1、新增webmagic框架爬虫爬取36壁纸网

2、其他细节调整
pull/254/head
xjs 4 years ago
parent 7b058766b5
commit 491e100071

@ -30,7 +30,7 @@
</el-form>
<div ref="historyChart" style="height:350px;width: 100%;"></div>
<div ref="futureChart" style="height: 350px;width: 100%;margin-top: 20px"></div>
<div ref="futureChart" style="height: 400px;width: 100%;margin-top: 20px"></div>
</div>
</template>
@ -38,6 +38,7 @@
<script>
import {getHistoryWeather, getFutureWeather} from "@/api/business/statistics/weatherstatistics";
import {pickerOptions} from "@/layout/mixin/PickerOptions";
// ECharts
var echarts = require('echarts/lib/echarts');
@ -49,6 +50,7 @@ require('echarts/lib/component/title');
export default {
name: "WeatherStatistics",
mixins: [pickerOptions],
data() {
return {
historyWeatherData: {},
@ -63,43 +65,6 @@ export default {
//
loading: false,
//
pickerOptions: {
shortcuts: [{
text: '昨天',
onClick(picker) {
const end = new Date();
const start = new Date();
start.setTime(start.getTime() - 3600 * 1000 * 24);
picker.$emit('pick', [start, end]);
}
}, {
text: '最近一周',
onClick(picker) {
const end = new Date();
const start = new Date();
start.setTime(start.getTime() - 3600 * 1000 * 24 * 7);
picker.$emit('pick', [start, end]);
}
}, {
text: '最近一个月',
onClick(picker) {
const end = new Date();
const start = new Date();
start.setTime(start.getTime() - 3600 * 1000 * 24 * 30);
picker.$emit('pick', [start, end]);
}
}, {
text: '最近三个月',
onClick(picker) {
const end = new Date();
const start = new Date();
start.setTime(start.getTime() - 3600 * 1000 * 24 * 90);
picker.$emit('pick', [start, end]);
}
}]
},
}
},
@ -160,18 +125,7 @@ export default {
trigger: 'axis'
},
legend: {},
toolbox: {
show: true,
feature: {
dataZoom: {
yAxisIndex: 'none'
},
dataView: {readOnly: false},
magicType: {type: ['line', 'bar']},
restore: {},
saveAsImage: {}
}
},
xAxis: {
type: 'category',
boundaryGap: false,
@ -188,44 +142,13 @@ export default {
name: '最高温度',
type: 'line',
data: this.futureWeatherData.maxTemperature,
markPoint: {
data: [
{type: 'max', name: 'Max'},
{type: 'min', name: 'Min'}
]
},
markLine: {
data: [{type: 'average', name: 'Avg'}]
}
},
{
name: '最低温度',
type: 'line',
data: this.futureWeatherData.minTemperature,
markPoint: {
data: [{name: '周最低', value: -2, xAxis: 1, yAxis: -1.5}]
},
markLine: {
data: [
{type: 'average', name: 'Avg'},
[
{
symbol: 'none',
x: '90%',
yAxis: 'max'
},
{
symbol: 'circle',
label: {
position: 'start',
formatter: 'Max'
},
type: 'max',
name: '最高点'
}
]
]
}
}
]
})

@ -2,7 +2,7 @@
<div class="app-container">
<el-form :model="queryParams" ref="queryForm" :inline="true" v-show="showSearch" label-width="68px">
<el-form-item label="文案标签" prop="type">
<el-select v-model="queryParams.type" placeholder="请选择文案标签" clearable size="small">
<el-select v-model="queryParams.type" placeholder="请选择文案标签" @change="handleQuery" clearable size="small">
<el-option
v-for="index in typeList"
:key="index"

@ -3,7 +3,7 @@
<el-form :model="queryParams" ref="queryForm" :inline="true" v-show="showSearch" label-width="68px">
<el-form-item label="新闻分类" prop="category">
<el-select v-model="queryParams.category" placeholder="请输入新闻分类" clearable size="small">
<el-select v-model="queryParams.category" placeholder="请输入新闻分类" @change="handleQuery" clearable size="small">
<el-option
v-for="index in typeList"
:key="index"

@ -0,0 +1,38 @@
package com.xjs.consts;
/**
*
* @author xiejs
* @since 2022-02-19
*/
public class ReptileConst {
//---------------------------url---------------------------------
/**
* url
*/
public static final String SINA_NEWS_URL= "https://news.sina.com.cn/";
/**
* url
*/
public static final String COPY_WRITING_NETWORK_URL= "https://www.wenanwang.com/";
/**
* 51url
*/
public static final String _51_JOB_URL= "https://search.51job.com/list/000000,000000,0000,01,9,99,java,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare=";
/**
* BOSSurl
*/
public static final String BOSS_JOB_URL= "https://www.zhipin.com";
}

@ -80,13 +80,15 @@ public class EnglishWordServiceImpl implements IEnglishWordService {
if (Objects.isNull(r.getData().getErrorCode())) {
//指定to为翻译字典转换的内容
englishWord.setContent(r.getData().getTo());
}
}
//添加缓存到redis并设置7天有效时间
//添加缓存到redis并设置1小时有效时间
Map<String, Object> build = new HashMap<>();
build.put(hkey, englishWord);
redisService.setCacheMap(TRAN_DICT, build);
redisService.expire(TRAN_DICT, TRAN_DICT_EXPIRE, TimeUnit.HOURS);
}
}
return englishWord;
}

@ -29,6 +29,12 @@
<groupId>us.codecraft</groupId>
<artifactId>webmagic-core</artifactId>
<version>0.7.5</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>us.codecraft</groupId>

@ -0,0 +1,74 @@
package com.xjs._36wallpaper.webmagic;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
/**
* 36
*
* @author xiejs
* @since 2022-02-19
*/
public class _36wallpaperProcessor implements PageProcessor {
private static final String URL = "https://www.3gbizhi.com/";
private Site site = Site.me()
.setCharset("uft8")//设置字符编码
.setTimeOut(10000)//设置超时时间
.setRetrySleepTime(2000)//设置重试间隔时间
.setCycleRetryTimes(3)//设置重试次数
.setSleepTime(10)//设置两个页面之间的间隔时间
;
public void setSite(Site site) {
this.site = site;
}
//解析页面
@Override
public void process(Page page) {
//解析返回的数据page,并且把解析的结果放到ResultItems中
/*//第一种写法:css选择器
page.putField("url_css", page.getHtml().css(".tmenu li a").all());
//第二种写法:xpath
page.putField("url_xpath",page.getHtml().xpath("//ul[@class=tmenu]/li/a").all());
//第三种写法:正则表达式
page.putField("url_regex", page.getHtml().css(".tmenu li a").regex(".*图片*.*").all());*/
// todo 爬取36壁纸图片
//获取链接
page.addTargetRequests(page.getHtml().css(".tmenu li").links().all());
page.putField("url", page.getHtml().css(".imgw").links().all());
}
@Override
public Site getSite() {
return site;
}
/**
*
*/
public void run() {
Spider.create(new _36wallpaperProcessor()).addUrl(URL).thread(5).runAsync();
}
public static void main(String[] args) {
Spider.create(new _36wallpaperProcessor()).addUrl("https://www.3gbizhi.com/").thread(5).run();
}
}

@ -19,6 +19,12 @@ public interface CopyWritingNetworkService extends IService<CopyWritingNetwork>
*/
int deleteRepeatData();
/**
*
* @return list
*/
List<Object> getType();
//---------------------------代码生成--------------------------------
/**
@ -45,10 +51,6 @@ public interface CopyWritingNetworkService extends IService<CopyWritingNetwork>
*/
int deleteCopyWritingNetworkById(Long id);
/**
*
* @return list
*/
List<Object> getType();
}

@ -46,6 +46,15 @@ public class CopyWritingNetworkServiceImpl extends ServiceImpl<CopyWritingNetwor
return list;
}
@Override
public List<Object> getType() {
QueryWrapper<CopyWritingNetwork> wrapper = new QueryWrapper<>();
wrapper.groupBy("type");
wrapper.select("type");
return this.listObjs(wrapper);
}
/**
*
@ -108,12 +117,5 @@ public class CopyWritingNetworkServiceImpl extends ServiceImpl<CopyWritingNetwor
return copyWritingNetworkMapper.deleteCopyWritingNetworkById(id);
}
@Override
public List<Object> getType() {
QueryWrapper<CopyWritingNetwork> wrapper = new QueryWrapper<>();
wrapper.groupBy("type");
wrapper.select("type");
return this.listObjs(wrapper);
}
}

@ -19,6 +19,7 @@ import java.util.Map;
import java.util.regex.Pattern;
import static com.xjs.consts.RegexConst.NUMBER_REGEX;
import static com.xjs.consts.ReptileConst.COPY_WRITING_NETWORK_URL;
/**
* url:https://www.wenanwang.com/
@ -36,18 +37,17 @@ public class CopyWritingNetworkTask {
private CopyWritingNetworkService copyWritingNetworkService;
public static final String URL = "https://www.wenanwang.com/";
private static final Pattern pattern = Pattern.compile(NUMBER_REGEX);
@ReptileLog(name = "文案网", url = URL)
@ReptileLog(name = "文案网", url = COPY_WRITING_NETWORK_URL)
public Long reptileCopyWriting() {
//定义循环次数计时器
Long count = 0L;
try {
String html = httpUtils.doGetHtml(URL);
String html = httpUtils.doGetHtml(COPY_WRITING_NETWORK_URL);
Document document = Jsoup.parse(html);
@ -104,7 +104,7 @@ public class CopyWritingNetworkTask {
for (Element element : a) {
String href = element.attr("href");
String newUrl = URL + href;
String newUrl = COPY_WRITING_NETWORK_URL + href;
String cw = httpUtils.doGetHtml(newUrl);
Document cwDocument = Jsoup.parse(cw);

@ -47,8 +47,8 @@ public class SinaNewsController extends MyBaseController {
@GetMapping("taskForPRC")
@ApiOperation("供定时任务服务RPC远程调用")
public R sinaTaskForPRC() {
Long count = sinaNewsTask.reptileSinaNews();
return R.ok(count);
sinaNewsTask.reptileSinaNews();
return R.ok();
}

@ -17,6 +17,12 @@ public interface SinaNewsService extends IService<SinaNews> {
*/
int deleteRepeatData();
/**
*
* @return List
*/
List<Object> getType();
//-------------------------代码生成----------------------------
/**
@ -43,9 +49,5 @@ public interface SinaNewsService extends IService<SinaNews> {
*/
public int deleteSinaNewsById(Long id);
/**
*
* @return List
*/
List<Object> getType();
}

@ -26,6 +26,15 @@ public class SinaNewsServiceImpl extends ServiceImpl<SinaNewsMapper, SinaNews> i
return sinaNewsMapper.deleteRepeatData();
}
@Override
public List<Object> getType() {
QueryWrapper<SinaNews> wrapper = new QueryWrapper<>();
wrapper.groupBy("category");
wrapper.select("category");
return this.listObjs(wrapper);
}
//-------------------------代码生成----------------------------
/**
@ -61,12 +70,5 @@ public class SinaNewsServiceImpl extends ServiceImpl<SinaNewsMapper, SinaNews> i
return sinaNewsMapper.deleteSinaNewsById(id);
}
@Override
public List<Object> getType() {
QueryWrapper<SinaNews> wrapper = new QueryWrapper<>();
wrapper.groupBy("category");
wrapper.select("category");
return this.listObjs(wrapper);
}
}

@ -17,6 +17,8 @@ import org.springframework.stereotype.Component;
import java.util.*;
import java.util.stream.Collectors;
import static com.xjs.consts.ReptileConst.SINA_NEWS_URL;
/**
*
*
@ -32,17 +34,15 @@ public class SinaNewsTask {
@Autowired
private SinaNewsService sinaNewsService;
private static final String URL = "https://news.sina.com.cn/";
//定义循环次数计时器
private Long count = 0L;
@ReptileLog(name = "新浪新闻", url = URL)
@ReptileLog(name = "新浪新闻", url = SINA_NEWS_URL)
public Long reptileSinaNews() {
Long thisCount = 0L;
try {
String html = httpUtils.doGetHtml(URL);
String html = httpUtils.doGetHtml(SINA_NEWS_URL);
Document document = Jsoup.parse(html);

@ -0,0 +1,5 @@
log4j.rootLogger=INFO,A1
log4j.appender.A1=org.apache.log4j.ConsoleAppender
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-d{yyyy-MM-dd HH:mm:ss,SSS} {%t} {%c}-{%p} %m%n
Loading…
Cancel
Save