1、爬虫任务类中url常量抽出一个常量类

2、其他类注解相关修改
pull/254/head
xjs 4 years ago
parent fba7ae528e
commit 899de00d9f

@ -1,9 +1,9 @@
package com.xjs.consts;
/**
* api
* @author xiejs
* @desc api
* @create 2022-01-07
* @since 2022-01-07
*/
public class ApiWarnHandleConst {
//已处理

@ -1,9 +1,9 @@
package com.xjs.consts;
/**
*
* @author xiejs
* @desc
* @create 2021-12-28
* @since 2021-12-28
*/
public class CopyWritingConst {

@ -1,9 +1,9 @@
package com.xjs.consts;
/**
*
* @author xiejs
* @desc
* @create 2021-12-31
* @since 2021-12-31
*/
public class EnglishWordConst {

@ -27,4 +27,9 @@ public class RegexConst {
* ipv4v6
*/
public static final String IP_REGEX ="^((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)$";
/**
*
*/
public static final String NUMBER_REGEX= "[0-9]*";
}

@ -0,0 +1,24 @@
package com.xjs.consts;
/**
*
* @author xiejs
* @since 2022-02-16
*/
public class ReptileUrlConst {
/**
*
*/
public static final String SINA_NEWS_URL = "https://news.sina.com.cn/";
/**
*
*/
public static final String COPY_WRITING_NETWORK_URL = "https://www.wenanwang.com/";
}

@ -1,9 +1,9 @@
package com.xjs.consts;
/**
*
* @author xiejs
* @desc
* @create 2021-12-26
* @since 2021-12-26
*/
public class ReqConst {
public static final Integer SUCCESS = 1;

@ -18,6 +18,9 @@ import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import static com.xjs.consts.RegexConst.NUMBER_REGEX;
import static com.xjs.consts.ReptileUrlConst.COPY_WRITING_NETWORK_URL;
/**
* url:https://www.wenanwang.com/
*
@ -34,23 +37,21 @@ public class CopyWritingNetworkTask {
private CopyWritingNetworkService copyWritingNetworkService;
public static final String URL = "https://www.wenanwang.com/";
private static Pattern pattern = Pattern.compile("[0-9]*");
private static final Pattern pattern = Pattern.compile(NUMBER_REGEX);
@Scheduled(fixedDelay = 1000 * 5)
@Scheduled(fixedDelay = 1000 * 5 * 60 * 10)
public void reptileCopyWriting() {
try {
String html = httpUtils.doGetHtml(URL);
String html = httpUtils.doGetHtml(COPY_WRITING_NETWORK_URL);
Document document = Jsoup.parse(html);
this.parseHtmlGetUrl(document);
} catch (Exception e) {
e.printStackTrace();
}finally {
} finally {
int i = copyWritingNetworkService.deleteRepeatData();
log.info("删除文案网数据重复数:"+i);
log.info("删除文案网数据重复数:" + i);
}
}
@ -92,7 +93,7 @@ public class CopyWritingNetworkTask {
for (Element element : a) {
String href = element.attr("href");
String newUrl = URL + href;
String newUrl = COPY_WRITING_NETWORK_URL + href;
String cw = httpUtils.doGetHtml(newUrl);
Document cwDocument = Jsoup.parse(cw);

@ -16,6 +16,8 @@ import org.springframework.stereotype.Component;
import java.util.*;
import java.util.stream.Collectors;
import static com.xjs.consts.ReptileUrlConst.SINA_NEWS_URL;
/**
*
* @author xiejs
@ -30,11 +32,12 @@ public class SinaNewsTask {
@Autowired
private SinaNewsService sinaNewsService;
public void reptileSinaNews() {
try {
String url = "https://news.sina.com.cn/";
String html = httpUtils.doGetHtml(url);
String html = httpUtils.doGetHtml(SINA_NEWS_URL);
Document document = Jsoup.parse(html);

Loading…
Cancel
Save