From f22a31a33306824237fe058ef3ea4045ffc2c06f Mon Sep 17 00:00:00 2001 From: xjs <1294405880@qq.com> Date: Mon, 18 Apr 2022 11:35:19 +0800 Subject: [PATCH] =?UTF-8?q?1=E3=80=81=E7=88=AC=E8=99=AB=E4=B8=AD=E5=85=B3?= =?UTF-8?q?=E6=9D=91=E6=89=8B=E6=9C=BA=E6=89=80=E6=9C=89=E9=A1=B5=E9=9D=A2?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E7=88=AC=E5=8F=96=E5=B9=B6=E4=BF=9D=E5=AD=98?= =?UTF-8?q?=E5=88=B0=E6=95=B0=E6=8D=AE=E5=BA=93=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../xjs/zol/webmagic/ZolPhonePipeline.java | 24 +++++++++--- .../xjs/zol/webmagic/ZolPhoneProcessor.java | 38 ++++++++++++++++--- 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/zol/webmagic/ZolPhonePipeline.java b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/zol/webmagic/ZolPhonePipeline.java index 07a5da35..3f341fba 100644 --- a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/zol/webmagic/ZolPhonePipeline.java +++ b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/zol/webmagic/ZolPhonePipeline.java @@ -1,6 +1,8 @@ package com.xjs.zol.webmagic; +import cn.hutool.core.collection.CollUtil; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; +import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; import com.xjs.zol.pojo.ZolPhone; import com.xjs.zol.service.ZolPhoneService; import lombok.extern.log4j.Log4j2; @@ -31,12 +33,22 @@ public class ZolPhonePipeline implements Pipeline { @Override public void process(ResultItems resultItems, Task task) { List zolPhoneList = resultItems.get("zolPhoneList"); - //循环遍历集合,当对象的名称在数据库为空才插入数据 - for (ZolPhone zolPhone : zolPhoneList) { - ZolPhone dbData = zolPhoneService.getOne(new LambdaQueryWrapper() - .eq(ZolPhone::getPhoneName, zolPhone.getPhoneName()), false); - if (Objects.isNull(dbData)) { - zolPhoneService.save(zolPhone); + if (CollUtil.isNotEmpty(zolPhoneList)) { + //循环遍历集合,当对象的名称在数据库为空才插入数据 + for (ZolPhone zolPhone : zolPhoneList) { + ZolPhone dbData = zolPhoneService.getOne(new LambdaQueryWrapper() + .eq(ZolPhone::getPhoneName, zolPhone.getPhoneName()), false); + if (Objects.isNull(dbData)) { + zolPhoneService.save(zolPhone); + } else { + //当前值与数据库热度值不相等的情况下更新数据库 + if (zolPhone.getHeat().compareTo(dbData.getHeat()) != 0) { + zolPhoneService.update(new LambdaUpdateWrapper() + .eq(ZolPhone::getPhoneName, zolPhone.getPhoneName()) + .set(ZolPhone::getHeat, zolPhone.getHeat())); + } + + } } } } diff --git a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/zol/webmagic/ZolPhoneProcessor.java b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/zol/webmagic/ZolPhoneProcessor.java index d3fa5c40..b33686a4 100644 --- a/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/zol/webmagic/ZolPhoneProcessor.java +++ b/xjs-business/xjs-business-webmagic/src/main/java/com/xjs/zol/webmagic/ZolPhoneProcessor.java @@ -1,8 +1,10 @@ package com.xjs.zol.webmagic; +import com.ruoyi.common.core.utils.StringUtils; import com.ruoyi.common.redis.service.RedisService; import com.xjs.zol.pojo.ZolPhone; import lombok.extern.log4j.Log4j2; +import org.apache.commons.lang3.math.NumberUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import us.codecraft.webmagic.Page; @@ -12,6 +14,7 @@ import us.codecraft.webmagic.selector.Selectable; import java.math.BigDecimal; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.concurrent.TimeUnit; @@ -27,6 +30,8 @@ import static com.xjs.consts.RedisConst.REPTILE_ZOL_PHONE_COUNT; @Component public class ZolPhoneProcessor implements PageProcessor { + public static final String URL = "https://detail.zol.com.cn/"; + @Autowired private RedisService redisService; @@ -38,6 +43,13 @@ public class ZolPhoneProcessor implements PageProcessor { if (count == null) { count = 0; } + //获取其他页面放入队列中 + //等待爬虫的页面后缀 + String html_href = page.getHtml().css(".page-box > .pagebar > .next", "href").get(); + + Thread.sleep(100); + + page.addTargetRequests(Collections.singletonList(html_href)); List zolPhoneList = new ArrayList<>(); @@ -55,7 +67,7 @@ public class ZolPhoneProcessor implements PageProcessor { //获取手机的详情页面url String href = li.css("li > .pic", "href").get(); - zolPhone.setDetailPage("https://detail.zol.com.cn/" + href); + zolPhone.setDetailPage(URL + href); //获取手机的名称 String phoneName = li.css("li > h3 > a", "text").get(); @@ -68,14 +80,30 @@ public class ZolPhoneProcessor implements PageProcessor { //获取手机的参考价 String price = li.css("li > .price-row .price-type", "text").get(); //排除无用数据 - if ("概念产品".equals(price)) { + if (StringUtils.isNotBlank(price)) { + //检查是否是数字 + boolean creatable = NumberUtils.isCreatable(price); + if (creatable) { + zolPhone.setPrice(new BigDecimal(price)); + } else { + continue; + } + } else { continue; } - zolPhone.setPrice(new BigDecimal(price)); //获取手机的评分 String heat = li.css("li > .comment-row > .score", "text").get(); - zolPhone.setHeat(new BigDecimal(heat)); + if (StringUtils.isNotBlank(heat)) { + boolean creatable = NumberUtils.isCreatable(price); + if (creatable) { + zolPhone.setHeat(new BigDecimal(heat)); + } else { + continue; + } + } else { + continue; + } //获取手机图片的地址 String picture = li.css("li > .pic > img", ".src").get(); @@ -87,7 +115,7 @@ public class ZolPhoneProcessor implements PageProcessor { count++; } - page.putField("zolPhoneList",zolPhoneList); + page.putField("zolPhoneList", zolPhoneList); redisService.setCacheObject(REPTILE_ZOL_PHONE_COUNT, count); } catch (Exception e) {