From bf1561d0926e68d673eeda4d2a219613bc323a6c Mon Sep 17 00:00:00 2001 From: xjs <1294405880@qq.com> Date: Sat, 18 Jun 2022 03:46:26 +0800 Subject: [PATCH] =?UTF-8?q?=E5=BC=80=E5=85=B3=E8=8E=B7=E5=8F=96=E6=89=80?= =?UTF-8?q?=E6=9C=89=E9=A1=B5=E9=9D=A2=202048=E7=A4=BE=E5=8C=BA=20?= =?UTF-8?q?=E6=A0=B9=E6=8D=AE=E6=AF=8F=E5=A4=A9=E6=97=A5=E6=9C=9F=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E6=9C=80=E6=96=B0=202048=E7=A4=BE=E5=8C=BA=20?= =?UTF-8?q?=E7=88=AC=E5=8F=96=E6=89=80=E6=9C=89=E5=88=86=E9=A1=B5=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=202048=E7=A4=BE=E5=8C=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/ruoyi/gateway/filter/AuthFilter.java | 8 +- .../java/com/xjs/consts/ReptileConst.java | 4 +- .../com/xjs/weixin/consts/WeiXinConst.java | 5 +- .../xjs/y2048community/consts/InitConst.java | 28 + .../xjs/y2048community/{ => html}/detail.html | 336 +--- .../xjs/y2048community/{ => html}/index.html | 0 .../xjs/y2048community/{ => html}/list.html | 4 +- .../java/com/xjs/y2048community/html/ttt.html | 11 + .../webmagic/Y2048communityPipeline.java | 175 ++ .../webmagic/Y2048communityProcessor.java | 183 +- .../com/xjs/y2048community/webmagic/ddd.html | 1739 +++++++++++++++++ 11 files changed, 2128 insertions(+), 365 deletions(-) create mode 100644 xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/consts/InitConst.java rename xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/{ => html}/detail.html (72%) rename xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/{ => html}/index.html (100%) rename xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/{ => html}/list.html (99%) create mode 100644 xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/html/ttt.html create mode 100644 xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/webmagic/ddd.html diff --git a/ruoyi-gateway/src/main/java/com/ruoyi/gateway/filter/AuthFilter.java b/ruoyi-gateway/src/main/java/com/ruoyi/gateway/filter/AuthFilter.java index 101de638..50cc1cdd 100644 --- a/ruoyi-gateway/src/main/java/com/ruoyi/gateway/filter/AuthFilter.java +++ b/ruoyi-gateway/src/main/java/com/ruoyi/gateway/filter/AuthFilter.java @@ -23,7 +23,7 @@ import reactor.core.publisher.Mono; /** * 网关鉴权 - * + * * @author ruoyi */ @Component @@ -51,6 +51,10 @@ public class AuthFilter implements GlobalFilter, Ordered { return chain.filter(exchange); } + + // todo 自定义注解跳过登录验证 + + String token = getToken(request); if (StringUtils.isEmpty(token)) { @@ -132,4 +136,4 @@ public class AuthFilter implements GlobalFilter, Ordered { return -200; } -} \ No newline at end of file +} diff --git a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReptileConst.java b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReptileConst.java index 9300d90b..28017ecf 100644 --- a/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReptileConst.java +++ b/xjs-business/xjs-business-common/src/main/java/com/xjs/consts/ReptileConst.java @@ -52,9 +52,9 @@ public class ReptileConst { public static final String ZOL_PHONE_URL= "https://detail.zol.com.cn"; /** - * 2048社区rul + * 2048社区rul--https://vb.haowenzhi.com/2048/ --https://bbs9.qs2m.live/2048/ */ - public static final String Y_2048_COMMUNITY_URL = "https://bbs9.qs2m.live/2048/"; + public static final String Y_2048_COMMUNITY_URL = "https://vb.haowenzhi.com/2048/"; diff --git a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/weixin/consts/WeiXinConst.java b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/weixin/consts/WeiXinConst.java index 76f49205..a542274d 100644 --- a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/weixin/consts/WeiXinConst.java +++ b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/weixin/consts/WeiXinConst.java @@ -19,13 +19,16 @@ public class WeiXinConst { public static final String REDIS_KEY_OFFICIAL = "sys_config:xjs.webmagic.official_accounts"; + public static final String REDIS_KEY_Y_2048 = "sys_config:xjs.webmagic.y2048"; + /** * 系统配置表中的key */ public static final String CONFIG_KEY = "xjs.webmagic.wechatPicture"; + public static final String CONFIG_KEY_OFFICIAL = "xjs.webmagic.official_accounts"; - public static final String CONFIG_KEY_OFFICIAL = "xjs:webmagic:official_accounts"; + public static final String CONFIG_KEY_Y_2048 = "xjs.webmagic.y2048"; diff --git a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/consts/InitConst.java b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/consts/InitConst.java new file mode 100644 index 00000000..26492887 --- /dev/null +++ b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/consts/InitConst.java @@ -0,0 +1,28 @@ +package com.xjs.y2048community.consts; + +/** + * 爬虫初始化常量 + * + * @author xiejs + * @since 2022-06-18 + */ +public class InitConst { + + /** + * 关 + */ + public static final String OFF = "off"; + + /** + * 开 + */ + public static final String ON = "on"; + + + /** + * 控制开关,true开 false关 + */ + public static final Boolean CONTROL = true; + + +} diff --git a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/detail.html b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/html/detail.html similarity index 72% rename from xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/detail.html rename to xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/html/detail.html index 5c967328..af9ccbee 100644 --- a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/detail.html +++ b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/html/detail.html @@ -64,322 +64,9 @@
- - -
a { margin-right: 10px } -
威尼斯人亚博赌场银河贵宾会91原创视频同城约炮皇冠体育凤凰娱乐聚合直播牛牛三公

必赢棋牌开元棋牌皇冠赌场王者棋牌澳门新葡京约炮大群兼职少妇真人娱乐必博体育 +
-
[06-16] - 诱惑黑丝玉足5【10P】  丝情话欲 @@ -503,7 +502,8 @@ 丝袜美脚3【12p】  - 稳定控制 + + 稳定控制
2022-06-16
0 diff --git a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/html/ttt.html b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/html/ttt.html new file mode 100644 index 00000000..97e08262 --- /dev/null +++ b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/html/ttt.html @@ -0,0 +1,11 @@ + diff --git a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/webmagic/Y2048communityPipeline.java b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/webmagic/Y2048communityPipeline.java index 17988b35..299a25d4 100644 --- a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/webmagic/Y2048communityPipeline.java +++ b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/webmagic/Y2048communityPipeline.java @@ -1,22 +1,197 @@ package com.xjs.y2048community.webmagic; +import cn.hutool.core.collection.CollUtil; +import cn.hutool.core.date.DatePattern; +import cn.hutool.core.date.DateUtil; +import cn.hutool.core.util.RandomUtil; +import com.ruoyi.common.core.constant.HttpStatus; +import com.ruoyi.common.core.utils.StringUtils; +import com.ruoyi.common.redis.service.RedisService; +import com.ruoyi.system.api.RemoteConfigService; +import com.xjs.common.util.WeiXinUtils; +import com.xjs.utils.RandomUtils; +import com.xjs.weixin.consts.WeiXinConst; +import com.xjs.y2048community.consts.InitConst; import lombok.extern.log4j.Log4j2; +import org.apache.http.HttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import us.codecraft.webmagic.ResultItems; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.pipeline.Pipeline; +import javax.annotation.Resource; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.Date; +import java.util.List; + +import static com.xjs.weixin.consts.WeiXinConst.*; + /** * 2048爬虫数据处理 + * * @author xiejs * @since 2022-06-17 */ @Component @Log4j2 public class Y2048communityPipeline implements Pipeline { + @Autowired + private RedisService redisService; + @Resource + private RemoteConfigService remoteConfigService; + @Override public void process(ResultItems resultItems, Task task) { + List srcs = resultItems.get("srcs"); + + String title = resultItems.get("title"); + + String type = resultItems.get("type"); + + if (CollUtil.isNotEmpty(srcs) && StringUtils.isNotEmpty(title) && StringUtils.isNotEmpty(type)) { + String appendPath = this.getAppendPath(title, type); + + File file = new File(appendPath); + if (file.exists()) { + return; + } + + for (String link : srcs) { + InputStream inputStream = null; + + // 创建GET请求 + CloseableHttpClient httpClient = HttpClients.createDefault(); + HttpGet httpGet = null; + try { + httpGet = new HttpGet(link); + HttpResponse response = httpClient.execute(httpGet); + if (response.getStatusLine().getStatusCode() == HttpStatus.SUCCESS) { + inputStream = response.getEntity().getContent(); + + //文件小于30kb则不写入 + long contentLength = response.getEntity().getContentLength(); + long kb = contentLength / 1024; + if (SIZE_KB > kb) { + continue; + } + + //拼接文件后缀 + String suffix; + if (link.contains(JPEG)) { + suffix = JPEG; + } else if (link.contains(JPG)) { + suffix = JPG; + } else if (link.contains(PNG)) { + suffix = PNG; + } else if (link.contains(GIF)) { + continue; + } else { + suffix = JPG; + } + + String chars = "ABCDEFGHIZKLMNOPQRSTUVWXYZ"; + char c = chars.charAt((int) (Math.random() * 1)); + + + String fileName = RandomUtils.randomZm() + RandomUtil.randomLong(100000, 1000000) + DOT + suffix; + + this.downloadPicture(inputStream, this.getPath(), fileName, title, type); + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + try { + if (httpGet != null) { + httpGet.clone(); + } + } catch (CloneNotSupportedException e) { + e.printStackTrace(); + } + try { + httpClient.close(); + } catch (IOException e) { + e.printStackTrace(); + } + try { + if (inputStream != null) { + inputStream.close(); + } + } catch (IOException e) { + log.error(e.getMessage()); + } + } + } + } + + + } + + /** + * 链接url下载图片 + * + * @param inputStream 输入流 + * @param path 磁盘地址 + * @param fileName 文件名称 + * @param title 标题名称 + */ + private void downloadPicture(InputStream inputStream, String path, String fileName, String title, String type) { + String appendPath = this.getAppendPath(title, type); + WeiXinUtils.downloadPicture(inputStream, path, fileName, title, appendPath); + } + + + /** + * 从缓存 -> 数据库 -> 内存 中获取磁盘地址 + * + * @return 地址 + */ + private String getPath() { + //磁盘路径 + String path; + //判断redis中是否存在 + Boolean hasKey = redisService.hasKey(REDIS_KEY_Y_2048); + if (hasKey) { + path = redisService.getCacheObject(REDIS_KEY_Y_2048); + } else { + String data = remoteConfigService.getConfigKeyForRPC(CONFIG_KEY_Y_2048).getData(); + if (StringUtils.isNotEmpty(data)) { + path = data; + } else { + path = WeiXinConst.PATH; + } + } + return path; + } + + /** + * 获取拼接后的磁盘路径 + * + * @param title 拼接的最后的文件夹 + * @return str + */ + private String getAppendPath(String title, String type) { + title = WeiXinUtils.filterTitle(title); + + String path = this.getPath() + File.separator + DateUtil.format(new Date(), + DatePattern.NORM_MONTH_PATTERN) + File.separator + type + File.separator + title; + + if (InitConst.CONTROL) { + path = this.getPath() + File.separator + type; + } + + return path; } } + + + + + diff --git a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/webmagic/Y2048communityProcessor.java b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/webmagic/Y2048communityProcessor.java index fc3a5c2a..c67f84b0 100644 --- a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/webmagic/Y2048communityProcessor.java +++ b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/webmagic/Y2048communityProcessor.java @@ -1,7 +1,11 @@ package com.xjs.y2048community.webmagic; +import cn.hutool.core.collection.CollUtil; +import cn.hutool.core.date.DateUtil; +import com.ruoyi.common.core.utils.StringUtils; import com.ruoyi.common.redis.service.RedisService; import com.xjs.consts.ReptileConst; +import com.xjs.y2048community.consts.InitConst; import lombok.extern.log4j.Log4j2; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; @@ -10,7 +14,9 @@ import us.codecraft.webmagic.Site; import us.codecraft.webmagic.processor.PageProcessor; import us.codecraft.webmagic.selector.Selectable; +import java.util.Arrays; import java.util.List; +import java.util.Optional; import java.util.concurrent.TimeUnit; import static com.xjs.consts.RedisConst.REPTILE_2048_COMMUNITY_COUNT; @@ -18,6 +24,7 @@ import static com.xjs.consts.ReptileConst.Y_2048_COMMUNITY_URL; /** * 社区 爬虫处理 + * * @author xiejs * @since 2022-06-17 */ @@ -31,6 +38,13 @@ public class Y2048communityProcessor implements PageProcessor { @Override public void process(Page page) { + // 开关获取所有页面 + + // 根据每天日期获取最新 + + // todo 网络情况好的时候爬取所有页面 + + try { Integer count = redisService.getCacheObject(REPTILE_2048_COMMUNITY_COUNT); if (count == null) { @@ -38,34 +52,34 @@ public class Y2048communityProcessor implements PageProcessor { } List trs = page.getHtml().css("#content .tr3").nodes(); - for (Selectable tr : trs) { - List allText = tr.css("th > span > a", "text").all(); - for (String text : allText) { - if ("COSPLAY".equalsIgnoreCase(text)) { - String href = tr.css("th > span > a", "href").get(); + if (CollUtil.isNotEmpty(trs)) { + for (Selectable tr : trs) { + List selectables = tr.css("th > span > a").nodes(); - //获取需要爬取的路径 - page.addTargetRequest(Y_2048_COMMUNITY_URL+href); - } - if ("高跟絲襪".equalsIgnoreCase(text)) { - String href = tr.css("th > span > a", "href").get(); + for (Selectable selectable : selectables) { + if (selectable.css("a", "text").get().equals("COSPLAY") || + selectable.css("a", "text").get().equals("高跟絲襪") + ) { - //获取需要爬取的路径 - page.addTargetRequest(Y_2048_COMMUNITY_URL+href); - } - } + //String href = selectable.css("a", "href").get(); - count += allText.size(); + String href = selectable.links().get(); - } + //获取需要爬取的路径 + page.addTargetRequest(href); + } + } - this.handlerListPage(page, count); + count += selectables.size(); + } + } + + this.handlerListPage(page, count); - redisService.setCacheObject(REPTILE_2048_COMMUNITY_COUNT, count); } catch (Exception e) { log.error(e.getMessage()); } finally { @@ -74,24 +88,145 @@ public class Y2048communityProcessor implements PageProcessor { } /** - * 处理列表页面 - * @param page 页面 + * 处理列表页面 + * + * @param page 页面 * @param count 总数 */ private void handlerListPage(Page page, Integer count) { - page.getHtml().css("#ajaxtable > .tr3 > .tal >"); + try { + String div = page.getHtml().css("#ajaxtable").get(); + + String bread = page.getHtml().css("#breadCrumb").get(); + + //获取所有分页页面 + if (InitConst.CONTROL) { + if (StringUtils.isNotEmpty(bread)) { + if ((bread.contains("COSPLAY") || bread.contains("高跟絲襪")) && !page.getUrl().get().contains("-page-")) { + String pages = page.getHtml().css(".pagesone > span", "text").get(); + if (StringUtils.isNotEmpty(pages)) { + if (pages.contains("/")) { + String[] split = pages.split("/"); + String num = split[split.length - 1]; + + int numInt = Integer.parseInt(num); + for (int i = 2; i <= numInt; i++) { + String url = page.getUrl().get(); + String[] splitUrl = url.split(".html"); + //String newUrl = splitUrl[0] + "-page-" + i + ".html"; + + StringBuffer sb = new StringBuffer(); + sb.append(splitUrl[0]).append("-page-").append(i).append(".html"); + + page.addTargetRequest(sb.toString()); + } + + } + } + } + } + } else if (StringUtils.isNotEmpty(bread)) { + if ((bread.contains("COSPLAY") || bread.contains("高跟絲襪")) && !page.getUrl().get().contains("-page-")) { + String pages = page.getHtml().css(".pagesone > span", "text").get(); + if (StringUtils.isNotEmpty(pages)) { + String url = page.getUrl().get(); + String[] splitUrl = url.split(".html"); + List asList = Arrays.asList(splitUrl[0] + "-page-2.html", splitUrl[0] + "-page-3.html"); + page.addTargetRequests(asList); + } + } + } + + + if (StringUtils.isNotEmpty(div)) { + + /*List subjects = page.getHtml().css("#ajaxtable .tr3 .tal").nodes(); + + for (int i = 0; i < subjects.size(); i++) { + if (i <= 18) { + continue; + } + + List hrefs = subjects.get(i).css(".subject", "href").all(); + + //subjects.get(i).css() + + List collect = hrefs.stream().map(href -> Y_2048_COMMUNITY_URL + href).collect(Collectors.toList()); + + count += collect.size(); + + page.addTargetRequests(collect); + }*/ + + List trs = page.getHtml().css("#ajaxtable .tr3 ").nodes(); + + for (Selectable tr : trs) { + String date = tr.css("td:nth-child(3) div", "text").get(); + + //不是当天的数据页面跳出 + if (StringUtils.isNotEmpty(date) && !InitConst.CONTROL) { + if (!DateUtil.today().equals(date)) { + continue; + } + } + + String href = tr.css("td .subject", "href").get(); + if (StringUtils.isNotEmpty(href)) { + String url = Y_2048_COMMUNITY_URL + href; + page.addTargetRequest(url); + } + + } + + } + + + //this.handlerDetailPage(page, count); + + } catch (Exception e) { + e.printStackTrace(); + } } + /** + * 处理详情页面 + * + * @param page 页面 + * @param count 总数 + */ + private void handlerDetailPage(Page page, Integer count) { + try { + String div = page.getHtml().css("#read_tpc").get(); + + if (StringUtils.isNotEmpty(div)) { + //获取图片链接 + List srcs = page.getHtml().css("#read_tpc > .att_img > img", "src").all(); + page.putField("srcs", srcs); + + //获取标题 + String title = page.getHtml().css("#subject_tpc", "text").get(); + page.putField("title", title); + + //获取分类 + String type = Optional.ofNullable(page.getHtml().css("#breadCrumb > a:nth-child(3)", "text").get()).orElse("未知"); + page.putField("type", type); + } + + } finally { + redisService.setCacheObject(REPTILE_2048_COMMUNITY_COUNT, count); + } + } @Override public Site getSite() { return Site.me() .addHeader(ReptileConst.headerKey, ReptileConst.headerValue) + .addHeader("Connection", "close") .setCharset("utf8")//设置字符编码 - .setTimeOut(5000)//设置超时时间 - .setRetrySleepTime(500)//设置重试间隔时间 - .setCycleRetryTimes(5)//设置重试次数 + .setTimeOut(10000)//设置超时时间 + .setRetrySleepTime(100)//设置重试间隔时间 + .setCycleRetryTimes(2)//设置重试次数 .setSleepTime(10)//设置两个页面之间的间隔时间 ; } diff --git a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/webmagic/ddd.html b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/webmagic/ddd.html new file mode 100644 index 00000000..433c184f --- /dev/null +++ b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/y2048community/webmagic/ddd.html @@ -0,0 +1,1739 @@ + + + + + 甜美正妹「jessy」純天然的「極品S曲線」完全是女神等級!超高顏值讓網友暴動 + + + + + + + + + + + + + + + + + + + +
+
+
+ + + + + + +
+
+ + + + + +
主题 : 甜美正妹「jessy」純天然的「極品S曲線」完全是女神等級!超高顏值讓網友暴動 +
+
+
+
+ + + + + + + +
+
+ +
+ + + + + +
+
+
级别: 超級版主
+
+
UID:1016648
精华:0
发帖:229301
威望:257983 點
金币:10043106 個
貢獻值:3 點
米粒:0 個
在线时间: 1588
+ +
+
楼主 发表于: 2022-06-11 23:16 +
  编辑 +
+ +
+
+
+

甜美正妹「jessy」純天然的「極品S曲線」完全是女神等級!超高顏值讓網友暴動

+
+
+ +
威尼斯人亚博赌场银河贵宾会91原创视频同城约炮皇冠体育凤凰娱乐聚合直播牛牛三公

必赢棋牌开元棋牌皇冠赌场王者棋牌澳门新葡京约炮大群兼职少妇真人娱乐必博体育 +
+
+
+ + + +

+ + + +

+ + + +

+ + + +

+ + + +

+ + + +

+ + + +

+ + + +

+ + + +

+ + + +

+
+
+
+
+
+
+
+ 顶端 + +
+
+
+
+ + + +
+
+
+ + + +
+
+
+