diff --git a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ApiWarnHandleConst.java b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ApiWarnHandleConst.java index db02f8d7..44b61208 100644 --- a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ApiWarnHandleConst.java +++ b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ApiWarnHandleConst.java @@ -1,9 +1,9 @@ package com.xjs.consts; /** + * api预警处理常量 * @author xiejs - * @desc api预警处理常量 - * @create 2022-01-07 + * @since 2022-01-07 */ public class ApiWarnHandleConst { //已处理 diff --git a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/CopyWritingConst.java b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/CopyWritingConst.java index 663d6b3d..f504156a 100644 --- a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/CopyWritingConst.java +++ b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/CopyWritingConst.java @@ -1,9 +1,9 @@ package com.xjs.consts; /** + * 各个数据平台常量类 * @author xiejs - * @desc 各个数据平台常量类 - * @create 2021-12-28 + * @since 2021-12-28 */ public class CopyWritingConst { diff --git a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/EnglishWordConst.java b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/EnglishWordConst.java index 5052ad15..1c2151e3 100644 --- a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/EnglishWordConst.java +++ b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/EnglishWordConst.java @@ -1,9 +1,9 @@ package com.xjs.consts; /** + * 英语单词常量类 * @author xiejs - * @desc 英语单词常量类 - * @create 2021-12-31 + * @since 2021-12-31 */ public class EnglishWordConst { diff --git a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/RegexConst.java b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/RegexConst.java index a982dbda..ed10dded 100644 --- a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/RegexConst.java +++ b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/RegexConst.java @@ -27,4 +27,9 @@ public class RegexConst { * ip地址v4、v6正则 */ public static final String IP_REGEX ="^((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)$"; + + /** + * 数字校验正则 + */ + public static final String NUMBER_REGEX= "[0-9]*"; } diff --git a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReptileUrlConst.java b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReptileUrlConst.java new file mode 100644 index 00000000..799412a0 --- /dev/null +++ b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReptileUrlConst.java @@ -0,0 +1,24 @@ +package com.xjs.consts; + +/** + * 爬虫网址常量类 + * @author xiejs + * @since 2022-02-16 + */ +public class ReptileUrlConst { + + /** + * 新浪新闻网站 + */ + public static final String SINA_NEWS_URL = "https://news.sina.com.cn/"; + + /** + * 文案网网址 + */ + public static final String COPY_WRITING_NETWORK_URL = "https://www.wenanwang.com/"; + + + + + +} diff --git a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReqConst.java b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReqConst.java index 85183716..5c4f3dfe 100644 --- a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReqConst.java +++ b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReqConst.java @@ -1,9 +1,9 @@ package com.xjs.consts; /** + * 请求是否成功常量 * @author xiejs - * @desc 请求是否成功常量 - * @create 2021-12-26 + * @since 2021-12-26 */ public class ReqConst { public static final Integer SUCCESS = 1; diff --git a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/copywritingNetwork/task/CopyWritingNetworkTask.java b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/copywritingNetwork/task/CopyWritingNetworkTask.java index a5f60d02..b100348d 100644 --- a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/copywritingNetwork/task/CopyWritingNetworkTask.java +++ b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/copywritingNetwork/task/CopyWritingNetworkTask.java @@ -18,6 +18,9 @@ import java.util.HashMap; import java.util.Map; import java.util.regex.Pattern; +import static com.xjs.consts.RegexConst.NUMBER_REGEX; +import static com.xjs.consts.ReptileUrlConst.COPY_WRITING_NETWORK_URL; + /** * 文案网爬虫任务 url:https://www.wenanwang.com/ * @@ -34,23 +37,21 @@ public class CopyWritingNetworkTask { private CopyWritingNetworkService copyWritingNetworkService; - public static final String URL = "https://www.wenanwang.com/"; - - private static Pattern pattern = Pattern.compile("[0-9]*"); + private static final Pattern pattern = Pattern.compile(NUMBER_REGEX); - @Scheduled(fixedDelay = 1000 * 5) + @Scheduled(fixedDelay = 1000 * 5 * 60 * 10) public void reptileCopyWriting() { try { - String html = httpUtils.doGetHtml(URL); + String html = httpUtils.doGetHtml(COPY_WRITING_NETWORK_URL); Document document = Jsoup.parse(html); this.parseHtmlGetUrl(document); } catch (Exception e) { e.printStackTrace(); - }finally { + } finally { int i = copyWritingNetworkService.deleteRepeatData(); - log.info("删除文案网数据重复数:"+i); + log.info("删除文案网数据重复数:" + i); } } @@ -92,7 +93,7 @@ public class CopyWritingNetworkTask { for (Element element : a) { String href = element.attr("href"); - String newUrl = URL + href; + String newUrl = COPY_WRITING_NETWORK_URL + href; String cw = httpUtils.doGetHtml(newUrl); Document cwDocument = Jsoup.parse(cw); diff --git a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/sina/task/SinaNewsTask.java b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/sina/task/SinaNewsTask.java index ae139f04..358ae506 100644 --- a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/sina/task/SinaNewsTask.java +++ b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/sina/task/SinaNewsTask.java @@ -16,6 +16,8 @@ import org.springframework.stereotype.Component; import java.util.*; import java.util.stream.Collectors; +import static com.xjs.consts.ReptileUrlConst.SINA_NEWS_URL; + /** * 新浪新闻爬虫任务 * @author xiejs @@ -30,11 +32,12 @@ public class SinaNewsTask { @Autowired private SinaNewsService sinaNewsService; + + public void reptileSinaNews() { try { - String url = "https://news.sina.com.cn/"; - String html = httpUtils.doGetHtml(url); + String html = httpUtils.doGetHtml(SINA_NEWS_URL); Document document = Jsoup.parse(html);