1、36壁纸网爬虫持久到数据库代码优化

pull/254/head
xjs 4 years ago
parent 8f36a77d2b
commit 7c2e1b684d

@ -397,9 +397,9 @@ export default {
this.queryParams.endCreateTime = this.daterangeCreateTime[1];
}
listWord(this.queryParams).then(response => {
this.loading = false;
this.wordList = response.rows;
this.total = response.total;
this.loading = false;
});
},
//

@ -1,5 +1,6 @@
package com.xjs._36wallpaper.task;
import com.xjs._36wallpaper.service._36wallpaperService;
import com.xjs._36wallpaper.webmagic._36wallpaperProcessor;
import com.xjs.annotation.ReptileLog;
import lombok.extern.log4j.Log4j2;
@ -20,6 +21,9 @@ public class _36wallpaperTask {
@Autowired
private _36wallpaperProcessor wallpaperProcessor;
@Autowired
private _36wallpaperService wallpaperService;
/**
*
@ -27,7 +31,12 @@ public class _36wallpaperTask {
*/
@ReptileLog(name = "36壁纸网", url = _36_WALLPAPER_URL)
public Long reptileWallpaper() {
return wallpaperProcessor.run();
Long run = wallpaperProcessor.run();
//删除重复数据
int count = wallpaperService.deleteRepeatData();
log.info("36壁纸删除重复数据数" + count);
return run;
}
}

@ -0,0 +1,37 @@
package com.xjs._36wallpaper.webmagic;
import cn.hutool.core.collection.CollUtil;
import com.xjs._36wallpaper.pojo._36wallpaper;
import com.xjs._36wallpaper.service._36wallpaperService;
import lombok.extern.log4j.Log4j2;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;
import java.util.List;
/**
* webmagic
* @author xiejs
* @since 2022-02-20
*/
@Component
@Log4j2
public class _36wallpaperPipeline implements Pipeline {
@Autowired
private _36wallpaperService wallpaperService;
@Override
public void process(ResultItems resultItems, Task task) {
List<_36wallpaper> wallpaperData = resultItems.get("_36wallpaperData");
if (CollUtil.isNotEmpty(wallpaperData)) {
wallpaperService.saveBatch(wallpaperData, 25);
}
}
}

@ -2,7 +2,6 @@ package com.xjs._36wallpaper.webmagic;
import com.ruoyi.common.redis.service.RedisService;
import com.xjs._36wallpaper.pojo._36wallpaper;
import com.xjs._36wallpaper.service._36wallpaperService;
import lombok.extern.log4j.Log4j2;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
@ -44,6 +43,9 @@ public class _36wallpaperProcessor implements PageProcessor {
private static final String headerValue = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36";
@Autowired
private _36wallpaperPipeline wallpaperPipeline;
private static RedisService redisService;
@Autowired
@ -52,12 +54,12 @@ public class _36wallpaperProcessor implements PageProcessor {
}
private static _36wallpaperService wallpaperService;
/*private static _36wallpaperService wallpaperService;
@Autowired
public void setWallpaperService(_36wallpaperService wallpaperService) {
_36wallpaperProcessor.wallpaperService = wallpaperService;
}
}*/
/**
*
@ -159,8 +161,11 @@ public class _36wallpaperProcessor implements PageProcessor {
}
}
//持久化
wallpaperService.saveBatch(wallpapers, 25);
//持久化 --使用Pipeline实现持久化了
//wallpaperService.saveBatch(wallpapers, 25);
//暂时保存到内存中后续实现Pipeline接口保存到数据库
page.putField("_36wallpaperData",wallpapers);
//循环次数存入redis中
Integer count = redisService.getCacheObject(REPTILE_COUNT);
@ -191,11 +196,10 @@ public class _36wallpaperProcessor implements PageProcessor {
public Long run() {
Spider.create(new _36wallpaperProcessor()).addUrl(_36_WALLPAPER_URL).thread(20)
.setScheduler(new QueueScheduler().setDuplicateRemover(new BloomFilterDuplicateRemover(110000)))
.addPipeline(wallpaperPipeline)
.run();
//删除重复数据
int count = wallpaperService.deleteRepeatData();
log.info("36壁纸删除重复数据数" + count);
//从redis中获取循环次数
Integer cache = redisService.getCacheObject(REPTILE_COUNT);

@ -112,7 +112,7 @@ public class reptileLogAspect {
*
*/
private void saveData(WebmagicLog webmagicLog) {
if (webmagicLog.getComplexRate() != null && webmagicLog.getComplexRate() == 0L) {
if (webmagicLog.getComplexRate() != null && webmagicLog.getComplexRate() == 0L || webmagicLog.getComplexRate() == 1L) {
webmagicLog.setStatus(ERROR);
} else {
webmagicLog.setStatus(SUCCESS);

Loading…
Cancel
Save