diff --git a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ApiWarnHandleConst.java b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ApiWarnHandleConst.java index 44b61208..db02f8d7 100644 --- a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ApiWarnHandleConst.java +++ b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ApiWarnHandleConst.java @@ -1,9 +1,9 @@ package com.xjs.consts; /** - * api预警处理常量 * @author xiejs - * @since 2022-01-07 + * @desc api预警处理常量 + * @create 2022-01-07 */ public class ApiWarnHandleConst { //已处理 diff --git a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/CopyWritingConst.java b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/CopyWritingConst.java index f504156a..663d6b3d 100644 --- a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/CopyWritingConst.java +++ b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/CopyWritingConst.java @@ -1,9 +1,9 @@ package com.xjs.consts; /** - * 各个数据平台常量类 * @author xiejs - * @since 2021-12-28 + * @desc 各个数据平台常量类 + * @create 2021-12-28 */ public class CopyWritingConst { diff --git a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/EnglishWordConst.java b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/EnglishWordConst.java index 1c2151e3..5052ad15 100644 --- a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/EnglishWordConst.java +++ b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/EnglishWordConst.java @@ -1,9 +1,9 @@ package com.xjs.consts; /** - * 英语单词常量类 * @author xiejs - * @since 2021-12-31 + * @desc 英语单词常量类 + * @create 2021-12-31 */ public class EnglishWordConst { diff --git a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/RegexConst.java b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/RegexConst.java index ed10dded..a982dbda 100644 --- a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/RegexConst.java +++ b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/RegexConst.java @@ -27,9 +27,4 @@ public class RegexConst { * ip地址v4、v6正则 */ public static final String IP_REGEX ="^((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)$"; - - /** - * 数字校验正则 - */ - public static final String NUMBER_REGEX= "[0-9]*"; } diff --git a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReptileUrlConst.java b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReptileUrlConst.java deleted file mode 100644 index 799412a0..00000000 --- a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReptileUrlConst.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.xjs.consts; - -/** - * 爬虫网址常量类 - * @author xiejs - * @since 2022-02-16 - */ -public class ReptileUrlConst { - - /** - * 新浪新闻网站 - */ - public static final String SINA_NEWS_URL = "https://news.sina.com.cn/"; - - /** - * 文案网网址 - */ - public static final String COPY_WRITING_NETWORK_URL = "https://www.wenanwang.com/"; - - - - - -} diff --git a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReqConst.java b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReqConst.java index 5c4f3dfe..85183716 100644 --- a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReqConst.java +++ b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReqConst.java @@ -1,9 +1,9 @@ package com.xjs.consts; /** - * 请求是否成功常量 * @author xiejs - * @since 2021-12-26 + * @desc 请求是否成功常量 + * @create 2021-12-26 */ public class ReqConst { public static final Integer SUCCESS = 1; diff --git a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/copywritingNetwork/task/CopyWritingNetworkTask.java b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/copywritingNetwork/task/CopyWritingNetworkTask.java index b100348d..a5f60d02 100644 --- a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/copywritingNetwork/task/CopyWritingNetworkTask.java +++ b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/copywritingNetwork/task/CopyWritingNetworkTask.java @@ -18,9 +18,6 @@ import java.util.HashMap; import java.util.Map; import java.util.regex.Pattern; -import static com.xjs.consts.RegexConst.NUMBER_REGEX; -import static com.xjs.consts.ReptileUrlConst.COPY_WRITING_NETWORK_URL; - /** * 文案网爬虫任务 url:https://www.wenanwang.com/ * @@ -37,21 +34,23 @@ public class CopyWritingNetworkTask { private CopyWritingNetworkService copyWritingNetworkService; - private static final Pattern pattern = Pattern.compile(NUMBER_REGEX); + public static final String URL = "https://www.wenanwang.com/"; + + private static Pattern pattern = Pattern.compile("[0-9]*"); - @Scheduled(fixedDelay = 1000 * 5 * 60 * 10) + @Scheduled(fixedDelay = 1000 * 5) public void reptileCopyWriting() { try { - String html = httpUtils.doGetHtml(COPY_WRITING_NETWORK_URL); + String html = httpUtils.doGetHtml(URL); Document document = Jsoup.parse(html); this.parseHtmlGetUrl(document); } catch (Exception e) { e.printStackTrace(); - } finally { + }finally { int i = copyWritingNetworkService.deleteRepeatData(); - log.info("删除文案网数据重复数:" + i); + log.info("删除文案网数据重复数:"+i); } } @@ -93,7 +92,7 @@ public class CopyWritingNetworkTask { for (Element element : a) { String href = element.attr("href"); - String newUrl = COPY_WRITING_NETWORK_URL + href; + String newUrl = URL + href; String cw = httpUtils.doGetHtml(newUrl); Document cwDocument = Jsoup.parse(cw); diff --git a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/sina/task/SinaNewsTask.java b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/sina/task/SinaNewsTask.java index 358ae506..ae139f04 100644 --- a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/sina/task/SinaNewsTask.java +++ b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/sina/task/SinaNewsTask.java @@ -16,8 +16,6 @@ import org.springframework.stereotype.Component; import java.util.*; import java.util.stream.Collectors; -import static com.xjs.consts.ReptileUrlConst.SINA_NEWS_URL; - /** * 新浪新闻爬虫任务 * @author xiejs @@ -32,12 +30,11 @@ public class SinaNewsTask { @Autowired private SinaNewsService sinaNewsService; - - public void reptileSinaNews() { try { + String url = "https://news.sina.com.cn/"; - String html = httpUtils.doGetHtml(SINA_NEWS_URL); + String html = httpUtils.doGetHtml(url); Document document = Jsoup.parse(html);