1、36壁纸网爬虫持久到数据库代码优化

pull/254/head
xjs 4 years ago
parent 8f36a77d2b
commit 7c2e1b684d

@ -397,9 +397,9 @@ export default {
this.queryParams.endCreateTime = this.daterangeCreateTime[1]; this.queryParams.endCreateTime = this.daterangeCreateTime[1];
} }
listWord(this.queryParams).then(response => { listWord(this.queryParams).then(response => {
this.loading = false;
this.wordList = response.rows; this.wordList = response.rows;
this.total = response.total; this.total = response.total;
this.loading = false;
}); });
}, },
// //

@ -1,5 +1,6 @@
package com.xjs._36wallpaper.task; package com.xjs._36wallpaper.task;
import com.xjs._36wallpaper.service._36wallpaperService;
import com.xjs._36wallpaper.webmagic._36wallpaperProcessor; import com.xjs._36wallpaper.webmagic._36wallpaperProcessor;
import com.xjs.annotation.ReptileLog; import com.xjs.annotation.ReptileLog;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
@ -20,6 +21,9 @@ public class _36wallpaperTask {
@Autowired @Autowired
private _36wallpaperProcessor wallpaperProcessor; private _36wallpaperProcessor wallpaperProcessor;
@Autowired
private _36wallpaperService wallpaperService;
/** /**
* *
@ -27,7 +31,12 @@ public class _36wallpaperTask {
*/ */
@ReptileLog(name = "36壁纸网", url = _36_WALLPAPER_URL) @ReptileLog(name = "36壁纸网", url = _36_WALLPAPER_URL)
public Long reptileWallpaper() { public Long reptileWallpaper() {
return wallpaperProcessor.run(); Long run = wallpaperProcessor.run();
//删除重复数据
int count = wallpaperService.deleteRepeatData();
log.info("36壁纸删除重复数据数" + count);
return run;
} }
} }

@ -0,0 +1,37 @@
package com.xjs._36wallpaper.webmagic;
import cn.hutool.core.collection.CollUtil;
import com.xjs._36wallpaper.pojo._36wallpaper;
import com.xjs._36wallpaper.service._36wallpaperService;
import lombok.extern.log4j.Log4j2;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;
import java.util.List;
/**
* webmagic
* @author xiejs
* @since 2022-02-20
*/
@Component
@Log4j2
public class _36wallpaperPipeline implements Pipeline {
@Autowired
private _36wallpaperService wallpaperService;
@Override
public void process(ResultItems resultItems, Task task) {
List<_36wallpaper> wallpaperData = resultItems.get("_36wallpaperData");
if (CollUtil.isNotEmpty(wallpaperData)) {
wallpaperService.saveBatch(wallpaperData, 25);
}
}
}

@ -2,7 +2,6 @@ package com.xjs._36wallpaper.webmagic;
import com.ruoyi.common.redis.service.RedisService; import com.ruoyi.common.redis.service.RedisService;
import com.xjs._36wallpaper.pojo._36wallpaper; import com.xjs._36wallpaper.pojo._36wallpaper;
import com.xjs._36wallpaper.service._36wallpaperService;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
@ -44,6 +43,9 @@ public class _36wallpaperProcessor implements PageProcessor {
private static final String headerValue = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36"; private static final String headerValue = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36";
@Autowired
private _36wallpaperPipeline wallpaperPipeline;
private static RedisService redisService; private static RedisService redisService;
@Autowired @Autowired
@ -52,12 +54,12 @@ public class _36wallpaperProcessor implements PageProcessor {
} }
private static _36wallpaperService wallpaperService; /*private static _36wallpaperService wallpaperService;
@Autowired @Autowired
public void setWallpaperService(_36wallpaperService wallpaperService) { public void setWallpaperService(_36wallpaperService wallpaperService) {
_36wallpaperProcessor.wallpaperService = wallpaperService; _36wallpaperProcessor.wallpaperService = wallpaperService;
} }*/
/** /**
* *
@ -159,8 +161,11 @@ public class _36wallpaperProcessor implements PageProcessor {
} }
} }
//持久化 //持久化 --使用Pipeline实现持久化了
wallpaperService.saveBatch(wallpapers, 25); //wallpaperService.saveBatch(wallpapers, 25);
//暂时保存到内存中后续实现Pipeline接口保存到数据库
page.putField("_36wallpaperData",wallpapers);
//循环次数存入redis中 //循环次数存入redis中
Integer count = redisService.getCacheObject(REPTILE_COUNT); Integer count = redisService.getCacheObject(REPTILE_COUNT);
@ -191,11 +196,10 @@ public class _36wallpaperProcessor implements PageProcessor {
public Long run() { public Long run() {
Spider.create(new _36wallpaperProcessor()).addUrl(_36_WALLPAPER_URL).thread(20) Spider.create(new _36wallpaperProcessor()).addUrl(_36_WALLPAPER_URL).thread(20)
.setScheduler(new QueueScheduler().setDuplicateRemover(new BloomFilterDuplicateRemover(110000))) .setScheduler(new QueueScheduler().setDuplicateRemover(new BloomFilterDuplicateRemover(110000)))
.addPipeline(wallpaperPipeline)
.run(); .run();
//删除重复数据
int count = wallpaperService.deleteRepeatData();
log.info("36壁纸删除重复数据数" + count);
//从redis中获取循环次数 //从redis中获取循环次数
Integer cache = redisService.getCacheObject(REPTILE_COUNT); Integer cache = redisService.getCacheObject(REPTILE_COUNT);

@ -112,7 +112,7 @@ public class reptileLogAspect {
* *
*/ */
private void saveData(WebmagicLog webmagicLog) { private void saveData(WebmagicLog webmagicLog) {
if (webmagicLog.getComplexRate() != null && webmagicLog.getComplexRate() == 0L) { if (webmagicLog.getComplexRate() != null && webmagicLog.getComplexRate() == 0L || webmagicLog.getComplexRate() == 1L) {
webmagicLog.setStatus(ERROR); webmagicLog.setStatus(ERROR);
} else { } else {
webmagicLog.setStatus(SUCCESS); webmagicLog.setStatus(SUCCESS);

Loading…
Cancel
Save