开关获取所有页面 2048社区

根据每天日期获取最新 2048社区
爬取所有分页数据 2048社区
pull/254/head
xjs 3 years ago
parent 636ee63f59
commit bf1561d092

@ -51,6 +51,10 @@ public class AuthFilter implements GlobalFilter, Ordered
{
return chain.filter(exchange);
}
// todo 自定义注解跳过登录验证
String token = getToken(request);
if (StringUtils.isEmpty(token))
{

@ -52,9 +52,9 @@ public class ReptileConst {
public static final String ZOL_PHONE_URL= "https://detail.zol.com.cn";
/**
* 2048rul
* 2048rul--https://vb.haowenzhi.com/2048/ --https://bbs9.qs2m.live/2048/
*/
public static final String Y_2048_COMMUNITY_URL = "https://bbs9.qs2m.live/2048/";
public static final String Y_2048_COMMUNITY_URL = "https://vb.haowenzhi.com/2048/";

@ -19,13 +19,16 @@ public class WeiXinConst {
public static final String REDIS_KEY_OFFICIAL = "sys_config:xjs.webmagic.official_accounts";
public static final String REDIS_KEY_Y_2048 = "sys_config:xjs.webmagic.y2048";
/**
* key
*/
public static final String CONFIG_KEY = "xjs.webmagic.wechatPicture";
public static final String CONFIG_KEY_OFFICIAL = "xjs.webmagic.official_accounts";
public static final String CONFIG_KEY_OFFICIAL = "xjs:webmagic:official_accounts";
public static final String CONFIG_KEY_Y_2048 = "xjs.webmagic.y2048";

@ -0,0 +1,28 @@
package com.xjs.y2048community.consts;
/**
*
*
* @author xiejs
* @since 2022-06-18
*/
public class InitConst {
/**
*
*/
public static final String OFF = "off";
/**
*
*/
public static final String ON = "on";
/**
* true false
*/
public static final Boolean CONTROL = true;
}

@ -64,322 +64,9 @@
</div>
<div class="main-wrap">
<div id="main">
<style type="text/css">.warning a:hover {
background: #F00;
color: #fff;
padding: 0;
}
.sigline {
margin-top: 10px;
margin-left: 15px;
background: url(images/wind/read/sigline.gif) right bottom no-repeat;
margin-right: 30em;
height: 5px;
}
.signature {
padding: 10px 15px 0;
height: expression(this.scrollHeight>parseInt(this.currentStyle.maxHeight)?this.currentStyle.maxHeight:"auto");
}
.user-pic img {
border: 1px solid #d5e6ed;
background: #fff;
padding: 3px;
}
.small {
font-size: 12px
}
.middle {
font-size: 14px
}
.big {
font-size: 18px
}
.dig {
font-size: 12px;
background-color: #ffffee;
background-position: -22px -57px;
padding: 0 .4em 0 1.6em;
cursor: pointer;
color: #666;
border: 1px solid #ffd0a8;
height: 18px;
line-height: 18px;
margin: 12px 5px 10px 15px;
}
.dig:hover {
text-decoration: none;
border: 1px solid #f5a25c;
color: #ff6600;
}
.readbot {
padding: 0px;
}
.readbot a {
list-style: none;
padding: 0 0 0 1.5em;
margin: 0;
float: left;
cursor: pointer;
background: url(images/wind/read/yin.gif) no-repeat;
width: 3.5em;
height: 16px;
}
.readbot .r-quote:hover {
background-position: 0 0;
}
.readbot .r-reply:hover {
background-position: 0 -20px;
}
.readbot .r-score:hover {
background-position: 0 -40px;
}
.readbot .r-keep:hover {
background-position: 0 -60px;
}
.readbot .r-recommend:hover {
background-position: 0 -160px;
}
.readbot .r-report:hover {
background-position: 0 -200px;
}
.readbot .r-quote {
background-position: 0 -80px;
}
.readbot .r-reply {
background-position: 0 -100px;
}
.readbot .r-score {
background-position: 0 -120px;
}
.readbot .r-keep {
background-position: 0 -140px;
}
.readbot .r-recommend {
background-position: 0 -180px;
}
.readbot .r-report {
background-position: 0 -220px;
}
.down {
background: url(images/post/down.gif) 5px center no-repeat;
padding: 5px 5px 5px 30px;
border: #c5d8e8 1px solid;
margin: 0 1em 0 0;
line-height: 40px;
}
.img-50 {
width: 48px;
height: 48px;
}
.cates {
margin: 0 0 10px;
}
.cates .cate-list li {
padding: 2px 0 1px 10px;
font-weight: 500;
color: #444444;
list-style: none;
}
.cates .cate-list em {
font-style: normal;
width: 100px;
float: left;
}
.cates .cate-list cite {
font-style: normal;
}
.cates input {
vertical-align: middle;
}
.cates .w {
margin-right: 10px;
}
.cates .two {
background: #f3f9fb;
}
.app-post {
background: url(images/app-post.png);
display: block;
float: left;
color: #ffffff;
width: 78px;
height: 26px;
line-height: 25px;
padding-left: 17px;
}
.app-post:hover {
text-decoration: none;
background-position: 0 -50px;
}
.flash {
position: relative;
}
.flash img {
height: 100%;
width: 100%;
}
.flash ul {
position: absolute;
right: 8px;
bottom: 8px;
z-index: 3;
}
.flash ul li {
list-style: none;
float: left;
width: 18px;
height: 13px;
line-height: 13px;
text-align: center;
margin-left: 2px;
background: #ffffff;
}
.flash ul li a {
display: block;
width: 18px;
height: 13px;
font-size: 10px;
font-family: Tahoma;
color: #333333;
}
.flash ul li a:hover, .flash ul li a.sel {
color: #fff;
text-decoration: none;
background: #ffa900;
}
.score {
margin: 10px 15px;
padding-top: 10px;
border-top: 1px dashed #d5e6ed;
line-height: 22px;
}
.score td {
padding: 2px 10px 2px 5px;
}
.score th {
padding: 6px 0 0;
}
.score tr:hover {
background: #f3f9fb;
}
.score a {
color: #3366cc;
}</style>
<div id="breadCrumb" class="cc"><a href="index.php" title="人人为我 我为人人">人人为我 我为人人</a>&raquo;<a
href="thread.php?fid=7">图片专区</a>&raquo;<a href="thread.php?fid=27">高跟絲襪</a>&raquo;<a
href="read.php?tid=6523984">诱惑黑丝玉足5【10P】</a><a href="read.php?fid=27&tid=6523984&toread=1"><font
color="red">&nbsp;(转到动态网页)</font></a>&nbsp;»
</div>
<style>.TOP_PD {
width: 100%;
text-align: center;
display: inline-block !important
}
.TOP_PD2 {
width: 100%;
text-align: center;
display: inline-block !important
}
.PD_TAC_BOX {
display: inline-block;
max-width: 100%;
margin: 0 auto
}
.PD_TAC_BOX li {
padding: 0;
width: auto;
min-width: 100px;
height: 36px;
float: left;
text-align: center;
list-style-type: none;
margin: 0px !important
}
.PD_TAC_BOX {
display: inline-block;
max-width: 100%;
margin: 0 auto;
float: left
}
.PD_TAC_BOX li a {
font-size: 20px;
font-weight: bold
}</style>
<div class="tac" style="margin:.5em 0 0;">
<div class="TOP_PD">
<div class="PD_TAC_BOX">
<li><a href="/htm/a1.htm" target="_BLANK" style="color: red">百家美女赌场</a>&nbsp;&nbsp;</li>
<li><a href="/htm/a4.htm" target="_BLANK" style="color: blue">重金担保皇冠</a>&nbsp;&nbsp;</li>
<li><a href="/htm/a8.htm" target="_BLANK" style="color: green">信誉凤凰娱乐</a>&nbsp;&nbsp;</li>
<li><a href="/htm/a5.htm" target="_BLANK" style="color: red">澳门皇冠赌场</a>&nbsp;&nbsp;</li>
<li><a href="/htm/a6.htm" target="_BLANK" style="color: green">逢赌必赢棋牌</a>&nbsp;&nbsp;</li>
<li><a href="/htm/a7.htm" target="_BLANK" style="color: red">王者信誉棋牌</a>&nbsp;&nbsp;</li>
<li><a href="/htm/b4.htm" target="_BLANK" style="color: blue">开元棋牌官网</a>&nbsp;&nbsp;</li>
<li><a href="/htm/b1.htm" target="_BLANK" style="color: green">澳门威尼斯人</a>&nbsp;&nbsp;</li>
<li><a href="/htm/sg.htm" target="_BLANK" style="color: green">博弈专区棋牌</a>&nbsp;&nbsp;</li>
</div>
</div>
<div class="TOP_PD2">
<div class="PD_TAC_BOX">
<li><a href="/htm/b9.htm" target="_BLANK" style="color: red">威尼斯人赌场</a>&nbsp;&nbsp;</li>
<li><a href="/htm/a2.htm" target="_BLANK" style="color: blue">澳门葡京赌场</a>&nbsp;&nbsp;</li>
<li><a href="/htm/b5.htm" target="_BLANK" style="color: red">9 1 福利视频</a>&nbsp;&nbsp;</li>
<li><a href="/htm/b2.htm" target="_BLANK" style="color: green">银河老牌赌城</a>&nbsp;&nbsp;</li>
<li><a href="/htm/b6.htm" target="_BLANK" style="color: red">同城在线约炮</a>&nbsp;&nbsp;</li>
<li><a href="/htm/b3.htm" target="_BLANK" style="color: blue">加人约炮大群</a>&nbsp;&nbsp;</li>
<li><a href="/htm/b7.htm" target="_BLANK" style="color: green">兼职少妇学生</a>&nbsp;&nbsp;</li>
<li><a href="/htm/b10.htm" target="_BLANK" style="color: red">鲍鱼聚合直播</a>&nbsp;&nbsp;</li>
<li><a href="/htm/c1.htm" target="_BLANK" style="color: blue">世界杯定投站</a></li>
</div>
</div>
</div>
<div class="t3"><span class="fr" style="margin-left:.5em"><a href="post.php?fid=27"><img
@ -458,27 +145,8 @@
.apd > a {
margin-right: 10px
}</style>
<div class="apd"><a href="/htm/b9.htm" target="_blank"><font size="4"
color="blue">威尼斯人</font></a><a
href="/htm/a1.htm" target="_blank"><font size="4" color="red">亚博赌场</font></a><a
href="/htm/b2.htm" target="_blank"><font size="4" color="blue">银河贵宾会</font></a><a
href="/htm/b5.htm" target="_blank"><font size="4" color="red">91原创视频</font></a><a
href="/htm/b7.htm" target="_blank"><font size="4" color="blue">同城约炮</font></a><a
href="/htm/a4.htm" target="_blank"><font size="4" color="red">皇冠体育</font></a><a
href="/htm/a8.htm" target="_blank"><font size="4" color="green">凤凰娱乐</font></a><a
href="/htm/b10.htm" target="_blank"><font size="4" color="red">聚合直播</font></a><a
href="/htm/sg.htm" target="_blank"><font size="4" color="red">牛牛三公</font></a><br><br><a
href="/htm/a6.htm" target="_blank"><font size="4" color="red">必赢棋牌</font></a><a
href="/htm/b4.htm" target="_blank"><font size="4" color="blue">开元棋牌</font></a><a
href="/htm/a5.htm" target="_blank"><font size="4" color="green">皇冠赌场</font></a><a
href="/htm/a7.htm" target="_blank"><font size="4" color="red">王者棋牌</font></a><a
href="/htm/a2.htm" target="_blank"><font size="4" color="blue">澳门新葡京</font></a><a
href="/htm/b6.htm" target="_blank"><font size="4" color="green">约炮大群</font></a><a
href="/htm/b3.htm" target="_blank"><font size="4" color="red">兼职少妇</font></a><a
href="/htm/b1.htm" target="_blank"><font size="4" color="blue">真人娱乐</font></a><a
href="/htm/c1.htm" target="_blank"><font size="4" color="red">必博体育</font></a>
<div class="apd">
<div class="tpc_content">
<div id="p_tpc" class="c"></div>
<div class="f14" id="read_tpc">
<ignore_js_op class="att_img"><img id="aimg_r2dflf1"
src="https://img.picelsb.com/i/2022/06/16/r2dflf.jpg"

@ -395,7 +395,6 @@
<tr align="center" class="tr3 t_one">
<td><a title="开放主题" href="state/p/27/2206/6523984.html" target="_blank"></a></td>
<td class="tal" id="td_6523984">[06-16]
<a href="state/p/27/2206/6523984.html" target="_blank" id="a_ajax_6523984" class="subject">诱惑黑丝玉足5【10P】</a>&nbsp;
</td>
<td class="tal y-style"><a href="u.php?action=show&uid=2783914" class="bl">丝情话欲</a>
@ -503,7 +502,8 @@
<a href="state/p/27/2206/6520570.html" target="_blank" id="a_ajax_6520570" class="subject">丝袜美脚3【12p】</a>&nbsp;
</td>
<td class="tal y-style"><a href="u.php?action=show&uid=2831127" class="bl">稳定控制</a>
<td class="tal y-style">
<a href="u.php?action=show&uid=2831127" class="bl">稳定控制</a>
<div class="f10 gray">2022-06-16</div>
</td>
<td class="tal y-style f10 gray"><span class="s3">0</span></td>

@ -0,0 +1,11 @@
<div id="breadCrumb" class="cc">
<a href="https://vb.haowenzhi.com/2048/index.php?m=bbs" title="人人为我 我为人人">人人为我 我为人人</a> &raquo;
<a href="thread.php?fid-273.html">美图秀秀</a> &raquo;
<a href="thread.php?fid-277.html">COSPLAY</a>
<span id="shortcut">
<a style="cursor:pointer;" onclick="javascript:shortCut();" title="将本版块添加到我的书签"><img src="images/wind/thread/cancel.gif"
align="absbottom"
style="margin-bottom:2px;"/></a>
</span>
</div>

@ -1,22 +1,197 @@
package com.xjs.y2048community.webmagic;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.date.DatePattern;
import cn.hutool.core.date.DateUtil;
import cn.hutool.core.util.RandomUtil;
import com.ruoyi.common.core.constant.HttpStatus;
import com.ruoyi.common.core.utils.StringUtils;
import com.ruoyi.common.redis.service.RedisService;
import com.ruoyi.system.api.RemoteConfigService;
import com.xjs.common.util.WeiXinUtils;
import com.xjs.utils.RandomUtils;
import com.xjs.weixin.consts.WeiXinConst;
import com.xjs.y2048community.consts.InitConst;
import lombok.extern.log4j.Log4j2;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;
import javax.annotation.Resource;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import java.util.List;
import static com.xjs.weixin.consts.WeiXinConst.*;
/**
* 2048
*
* @author xiejs
* @since 2022-06-17
*/
@Component
@Log4j2
public class Y2048communityPipeline implements Pipeline {
@Autowired
private RedisService redisService;
@Resource
private RemoteConfigService remoteConfigService;
@Override
public void process(ResultItems resultItems, Task task) {
List<String> srcs = resultItems.get("srcs");
String title = resultItems.get("title");
String type = resultItems.get("type");
if (CollUtil.isNotEmpty(srcs) && StringUtils.isNotEmpty(title) && StringUtils.isNotEmpty(type)) {
String appendPath = this.getAppendPath(title, type);
File file = new File(appendPath);
if (file.exists()) {
return;
}
for (String link : srcs) {
InputStream inputStream = null;
// 创建GET请求
CloseableHttpClient httpClient = HttpClients.createDefault();
HttpGet httpGet = null;
try {
httpGet = new HttpGet(link);
HttpResponse response = httpClient.execute(httpGet);
if (response.getStatusLine().getStatusCode() == HttpStatus.SUCCESS) {
inputStream = response.getEntity().getContent();
//文件小于30kb则不写入
long contentLength = response.getEntity().getContentLength();
long kb = contentLength / 1024;
if (SIZE_KB > kb) {
continue;
}
//拼接文件后缀
String suffix;
if (link.contains(JPEG)) {
suffix = JPEG;
} else if (link.contains(JPG)) {
suffix = JPG;
} else if (link.contains(PNG)) {
suffix = PNG;
} else if (link.contains(GIF)) {
continue;
} else {
suffix = JPG;
}
String chars = "ABCDEFGHIZKLMNOPQRSTUVWXYZ";
char c = chars.charAt((int) (Math.random() * 1));
String fileName = RandomUtils.randomZm() + RandomUtil.randomLong(100000, 1000000) + DOT + suffix;
this.downloadPicture(inputStream, this.getPath(), fileName, title, type);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (httpGet != null) {
httpGet.clone();
}
} catch (CloneNotSupportedException e) {
e.printStackTrace();
}
try {
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
if (inputStream != null) {
inputStream.close();
}
} catch (IOException e) {
log.error(e.getMessage());
}
}
}
}
}
/**
* url
*
* @param inputStream
* @param path
* @param fileName
* @param title
*/
private void downloadPicture(InputStream inputStream, String path, String fileName, String title, String type) {
String appendPath = this.getAppendPath(title, type);
WeiXinUtils.downloadPicture(inputStream, path, fileName, title, appendPath);
}
/**
* -> ->
*
* @return
*/
private String getPath() {
//磁盘路径
String path;
//判断redis中是否存在
Boolean hasKey = redisService.hasKey(REDIS_KEY_Y_2048);
if (hasKey) {
path = redisService.getCacheObject(REDIS_KEY_Y_2048);
} else {
String data = remoteConfigService.getConfigKeyForRPC(CONFIG_KEY_Y_2048).getData();
if (StringUtils.isNotEmpty(data)) {
path = data;
} else {
path = WeiXinConst.PATH;
}
}
return path;
}
/**
*
*
* @param title
* @return str
*/
private String getAppendPath(String title, String type) {
title = WeiXinUtils.filterTitle(title);
String path = this.getPath() + File.separator + DateUtil.format(new Date(),
DatePattern.NORM_MONTH_PATTERN) + File.separator + type + File.separator + title;
if (InitConst.CONTROL) {
path = this.getPath() + File.separator + type;
}
return path;
}
}

@ -1,7 +1,11 @@
package com.xjs.y2048community.webmagic;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.date.DateUtil;
import com.ruoyi.common.core.utils.StringUtils;
import com.ruoyi.common.redis.service.RedisService;
import com.xjs.consts.ReptileConst;
import com.xjs.y2048community.consts.InitConst;
import lombok.extern.log4j.Log4j2;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
@ -10,7 +14,9 @@ import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import static com.xjs.consts.RedisConst.REPTILE_2048_COMMUNITY_COUNT;
@ -18,6 +24,7 @@ import static com.xjs.consts.ReptileConst.Y_2048_COMMUNITY_URL;
/**
*
*
* @author xiejs
* @since 2022-06-17
*/
@ -31,6 +38,13 @@ public class Y2048communityProcessor implements PageProcessor {
@Override
public void process(Page page) {
// 开关获取所有页面
// 根据每天日期获取最新
// todo 网络情况好的时候爬取所有页面
try {
Integer count = redisService.getCacheObject(REPTILE_2048_COMMUNITY_COUNT);
if (count == null) {
@ -38,34 +52,34 @@ public class Y2048communityProcessor implements PageProcessor {
}
List<Selectable> trs = page.getHtml().css("#content .tr3").nodes();
for (Selectable tr : trs) {
List<String> allText = tr.css("th > span > a", "text").all();
for (String text : allText) {
if ("COSPLAY".equalsIgnoreCase(text)) {
String href = tr.css("th > span > a", "href").get();
if (CollUtil.isNotEmpty(trs)) {
for (Selectable tr : trs) {
List<Selectable> selectables = tr.css("th > span > a").nodes();
//获取需要爬取的路径
page.addTargetRequest(Y_2048_COMMUNITY_URL+href);
}
if ("高跟絲襪".equalsIgnoreCase(text)) {
String href = tr.css("th > span > a", "href").get();
for (Selectable selectable : selectables) {
if (selectable.css("a", "text").get().equals("COSPLAY") ||
selectable.css("a", "text").get().equals("高跟絲襪")
) {
//获取需要爬取的路径
page.addTargetRequest(Y_2048_COMMUNITY_URL+href);
}
}
//String href = selectable.css("a", "href").get();
count += allText.size();
String href = selectable.links().get();
}
//获取需要爬取的路径
page.addTargetRequest(href);
}
}
this.handlerListPage(page, count);
count += selectables.size();
}
}
this.handlerListPage(page, count);
redisService.setCacheObject(REPTILE_2048_COMMUNITY_COUNT, count);
} catch (Exception e) {
log.error(e.getMessage());
} finally {
@ -74,24 +88,145 @@ public class Y2048communityProcessor implements PageProcessor {
}
/**
*
* @param page
*
*
* @param page
* @param count
*/
private void handlerListPage(Page page, Integer count) {
page.getHtml().css("#ajaxtable > .tr3 > .tal >");
try {
String div = page.getHtml().css("#ajaxtable").get();
String bread = page.getHtml().css("#breadCrumb").get();
//获取所有分页页面
if (InitConst.CONTROL) {
if (StringUtils.isNotEmpty(bread)) {
if ((bread.contains("COSPLAY") || bread.contains("高跟絲襪")) && !page.getUrl().get().contains("-page-")) {
String pages = page.getHtml().css(".pagesone > span", "text").get();
if (StringUtils.isNotEmpty(pages)) {
if (pages.contains("/")) {
String[] split = pages.split("/");
String num = split[split.length - 1];
int numInt = Integer.parseInt(num);
for (int i = 2; i <= numInt; i++) {
String url = page.getUrl().get();
String[] splitUrl = url.split(".html");
//String newUrl = splitUrl[0] + "-page-" + i + ".html";
StringBuffer sb = new StringBuffer();
sb.append(splitUrl[0]).append("-page-").append(i).append(".html");
page.addTargetRequest(sb.toString());
}
}
}
}
}
} else if (StringUtils.isNotEmpty(bread)) {
if ((bread.contains("COSPLAY") || bread.contains("高跟絲襪")) && !page.getUrl().get().contains("-page-")) {
String pages = page.getHtml().css(".pagesone > span", "text").get();
if (StringUtils.isNotEmpty(pages)) {
String url = page.getUrl().get();
String[] splitUrl = url.split(".html");
List<String> asList = Arrays.asList(splitUrl[0] + "-page-2.html", splitUrl[0] + "-page-3.html");
page.addTargetRequests(asList);
}
}
}
if (StringUtils.isNotEmpty(div)) {
/*List<Selectable> subjects = page.getHtml().css("#ajaxtable .tr3 .tal").nodes();
for (int i = 0; i < subjects.size(); i++) {
if (i <= 18) {
continue;
}
List<String> hrefs = subjects.get(i).css(".subject", "href").all();
//subjects.get(i).css()
List<String> collect = hrefs.stream().map(href -> Y_2048_COMMUNITY_URL + href).collect(Collectors.toList());
count += collect.size();
page.addTargetRequests(collect);
}*/
List<Selectable> trs = page.getHtml().css("#ajaxtable .tr3 ").nodes();
for (Selectable tr : trs) {
String date = tr.css("td:nth-child(3) div", "text").get();
//不是当天的数据页面跳出
if (StringUtils.isNotEmpty(date) && !InitConst.CONTROL) {
if (!DateUtil.today().equals(date)) {
continue;
}
}
String href = tr.css("td .subject", "href").get();
if (StringUtils.isNotEmpty(href)) {
String url = Y_2048_COMMUNITY_URL + href;
page.addTargetRequest(url);
}
}
}
//this.handlerDetailPage(page, count);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*
*
* @param page
* @param count
*/
private void handlerDetailPage(Page page, Integer count) {
try {
String div = page.getHtml().css("#read_tpc").get();
if (StringUtils.isNotEmpty(div)) {
//获取图片链接
List<String> srcs = page.getHtml().css("#read_tpc > .att_img > img", "src").all();
page.putField("srcs", srcs);
//获取标题
String title = page.getHtml().css("#subject_tpc", "text").get();
page.putField("title", title);
//获取分类
String type = Optional.ofNullable(page.getHtml().css("#breadCrumb > a:nth-child(3)", "text").get()).orElse("未知");
page.putField("type", type);
}
} finally {
redisService.setCacheObject(REPTILE_2048_COMMUNITY_COUNT, count);
}
}
@Override
public Site getSite() {
return Site.me()
.addHeader(ReptileConst.headerKey, ReptileConst.headerValue)
.addHeader("Connection", "close")
.setCharset("utf8")//设置字符编码
.setTimeOut(5000)//设置超时时间
.setRetrySleepTime(500)//设置重试间隔时间
.setCycleRetryTimes(5)//设置重试次数
.setTimeOut(10000)//设置超时时间
.setRetrySleepTime(100)//设置重试间隔时间
.setCycleRetryTimes(2)//设置重试次数
.setSleepTime(10)//设置两个页面之间的间隔时间
;
}

Loading…
Cancel
Save