parent
54a6802148
commit
1a5624533f
@ -0,0 +1,33 @@
|
||||
package com.xjs.zol.controller;
|
||||
|
||||
import com.ruoyi.common.core.domain.R;
|
||||
import com.xjs.zol.task.ZolPhoneTask;
|
||||
import io.swagger.annotations.Api;
|
||||
import io.swagger.annotations.ApiOperation;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
/**
|
||||
* 爬虫中关村手机controller
|
||||
* @author xiejs
|
||||
* @since 2022-04-18
|
||||
*/
|
||||
@RestController
|
||||
@RequestMapping("zol-phone")
|
||||
@Api(tags = "爬虫模块-中关村手机")
|
||||
public class ZolPhoneController {
|
||||
|
||||
@Autowired
|
||||
private ZolPhoneTask zolPhoneTask;
|
||||
|
||||
|
||||
//------------------------------内部调用rpc-------------------------------------
|
||||
@GetMapping("taskForPRC")
|
||||
@ApiOperation("供定时任务服务RPC远程调用")
|
||||
public R<Long> ZolPhoneTaskForRPC() {
|
||||
Long aLong = zolPhoneTask.reptileZolPhone();
|
||||
return R.ok(aLong);
|
||||
}
|
||||
}
|
@ -0,0 +1,13 @@
|
||||
package com.xjs.zol.mapper;
|
||||
|
||||
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
|
||||
import com.xjs.zol.pojo.ZolPhone;
|
||||
|
||||
/**
|
||||
* 爬虫数据中关村手机mapper
|
||||
* @author xiejs
|
||||
* @since 2022-04-18
|
||||
*/
|
||||
public interface ZolPhoneMapper extends BaseMapper<ZolPhone> {
|
||||
|
||||
}
|
@ -0,0 +1,59 @@
|
||||
package com.xjs.zol.pojo;
|
||||
|
||||
import com.baomidou.mybatisplus.annotation.FieldFill;
|
||||
import com.baomidou.mybatisplus.annotation.TableField;
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
import com.ruoyi.common.core.annotation.Excel;
|
||||
import com.xjs.entity.BaseEntity;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.Accessors;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.Date;
|
||||
|
||||
/**
|
||||
* 爬虫数据中关村手机对象 webmagic_zol_phone
|
||||
* @author xiejs
|
||||
* @since 2022-04-17
|
||||
*/
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@Accessors(chain = true)
|
||||
@TableName("webmagic_zol_phone")
|
||||
public class ZolPhone extends BaseEntity {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/** 主键id */
|
||||
private Long id;
|
||||
|
||||
/** 手机名称 */
|
||||
@Excel(name = "手机名称")
|
||||
private String phoneName;
|
||||
|
||||
/** 图片地址 */
|
||||
@Excel(name = "图片地址")
|
||||
private String pictureUrl;
|
||||
|
||||
/** 手机描述 */
|
||||
@Excel(name = "手机描述")
|
||||
private String description;
|
||||
|
||||
/** 手机详情页面 */
|
||||
@Excel(name = "手机详情页面")
|
||||
private String detailPage;
|
||||
|
||||
/** 热度 */
|
||||
@Excel(name = "热度")
|
||||
private BigDecimal heat;
|
||||
|
||||
/** 价格 */
|
||||
@Excel(name = "价格")
|
||||
private BigDecimal price;
|
||||
|
||||
@Excel(name = "创建时间", dateFormat = "yyyy-MM-dd HH:mm:ss")
|
||||
@TableField(fill = FieldFill.INSERT)
|
||||
private Date createTime;
|
||||
|
||||
}
|
@ -0,0 +1,12 @@
|
||||
package com.xjs.zol.service;
|
||||
|
||||
import com.baomidou.mybatisplus.extension.service.IService;
|
||||
import com.xjs.zol.pojo.ZolPhone;
|
||||
|
||||
/**
|
||||
* 爬虫数据中关村手机service接口
|
||||
* @author xiejs
|
||||
* @since 2022-04-18
|
||||
*/
|
||||
public interface ZolPhoneService extends IService<ZolPhone> {
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
package com.xjs.zol.service.impl;
|
||||
|
||||
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
|
||||
import com.xjs.zol.mapper.ZolPhoneMapper;
|
||||
import com.xjs.zol.pojo.ZolPhone;
|
||||
import com.xjs.zol.service.ZolPhoneService;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* 爬虫数据中关村手机service实现
|
||||
* @author xiejs
|
||||
* @since 2022-04-18
|
||||
*/
|
||||
@Service
|
||||
public class ZolPhoneServiceImpl extends ServiceImpl<ZolPhoneMapper, ZolPhone> implements ZolPhoneService {
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
package com.xjs.zol.task;
|
||||
|
||||
import com.ruoyi.common.redis.service.RedisService;
|
||||
import com.xjs.annotation.ReptileLog;
|
||||
import com.xjs.zol.webmagic.ZolPhonePipeline;
|
||||
import com.xjs.zol.webmagic.ZolPhoneProcessor;
|
||||
import lombok.extern.log4j.Log4j2;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
import us.codecraft.webmagic.Spider;
|
||||
import us.codecraft.webmagic.scheduler.BloomFilterDuplicateRemover;
|
||||
import us.codecraft.webmagic.scheduler.QueueScheduler;
|
||||
|
||||
import static com.xjs.consts.RedisConst.REPTILE_ZOL_PHONE_COUNT;
|
||||
import static com.xjs.consts.ReptileConst.ZOL_PHONE_URL;
|
||||
|
||||
/**
|
||||
* 中关村产品报价爬取手机任务
|
||||
*
|
||||
* @author xiejs
|
||||
* @since 2022-04-17
|
||||
*/
|
||||
@Component
|
||||
@Log4j2
|
||||
public class ZolPhoneTask {
|
||||
|
||||
@Autowired
|
||||
private ZolPhoneProcessor zolPhoneProcessor;
|
||||
@Autowired
|
||||
private RedisService redisService;
|
||||
@Autowired
|
||||
private ZolPhonePipeline zolPhonePipeline;
|
||||
|
||||
|
||||
@ReptileLog(name = "中关村手机", url = ZOL_PHONE_URL)
|
||||
public Long reptileZolPhone() {
|
||||
//执行爬虫
|
||||
Spider.create(zolPhoneProcessor)
|
||||
.addUrl(ZOL_PHONE_URL)//设置爬取地址
|
||||
.thread(30)//设置爬取线程数
|
||||
.setScheduler(new QueueScheduler()
|
||||
.setDuplicateRemover(new BloomFilterDuplicateRemover(110000)))//设置url去重过滤器
|
||||
.addPipeline(zolPhonePipeline)//设置爬取之后的数据操作
|
||||
//.setDownloader(downloader)//设置下载器
|
||||
.run();//同步执行
|
||||
|
||||
Integer cache = redisService.getCacheObject(REPTILE_ZOL_PHONE_COUNT);
|
||||
redisService.deleteObject(REPTILE_ZOL_PHONE_COUNT);
|
||||
if (cache != null) {
|
||||
return Long.valueOf(cache);
|
||||
}
|
||||
return 0L;
|
||||
}
|
||||
}
|
@ -0,0 +1,43 @@
|
||||
package com.xjs.zol.webmagic;
|
||||
|
||||
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
||||
import com.xjs.zol.pojo.ZolPhone;
|
||||
import com.xjs.zol.service.ZolPhoneService;
|
||||
import lombok.extern.log4j.Log4j2;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import us.codecraft.webmagic.ResultItems;
|
||||
import us.codecraft.webmagic.Task;
|
||||
import us.codecraft.webmagic.pipeline.Pipeline;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* 中关村手机页面爬虫数据处理
|
||||
*
|
||||
* @author xiejs
|
||||
* @since 2022-04-17
|
||||
*/
|
||||
@Component
|
||||
@Log4j2
|
||||
@Transactional
|
||||
public class ZolPhonePipeline implements Pipeline {
|
||||
|
||||
@Autowired
|
||||
private ZolPhoneService zolPhoneService;
|
||||
|
||||
@Override
|
||||
public void process(ResultItems resultItems, Task task) {
|
||||
List<ZolPhone> zolPhoneList = resultItems.get("zolPhoneList");
|
||||
//循环遍历集合,当对象的名称在数据库为空才插入数据
|
||||
for (ZolPhone zolPhone : zolPhoneList) {
|
||||
ZolPhone dbData = zolPhoneService.getOne(new LambdaQueryWrapper<ZolPhone>()
|
||||
.eq(ZolPhone::getPhoneName, zolPhone.getPhoneName()), false);
|
||||
if (Objects.isNull(dbData)) {
|
||||
zolPhoneService.save(zolPhone);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,113 @@
|
||||
package com.xjs.zol.webmagic;
|
||||
|
||||
import com.ruoyi.common.redis.service.RedisService;
|
||||
import com.xjs.zol.pojo.ZolPhone;
|
||||
import lombok.extern.log4j.Log4j2;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
import us.codecraft.webmagic.Page;
|
||||
import us.codecraft.webmagic.Site;
|
||||
import us.codecraft.webmagic.processor.PageProcessor;
|
||||
import us.codecraft.webmagic.selector.Selectable;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static com.xjs.consts.RedisConst.REPTILE_ZOL_PHONE_COUNT;
|
||||
|
||||
/**
|
||||
* 中关村产品报价爬取手机处理
|
||||
*
|
||||
* @author xiejs
|
||||
* @since 2022-04-17
|
||||
*/
|
||||
@Log4j2
|
||||
@Component
|
||||
public class ZolPhoneProcessor implements PageProcessor {
|
||||
|
||||
@Autowired
|
||||
private RedisService redisService;
|
||||
|
||||
@Override
|
||||
public void process(Page page) {
|
||||
|
||||
try {
|
||||
Integer count = redisService.getCacheObject(REPTILE_ZOL_PHONE_COUNT);
|
||||
if (count == null) {
|
||||
count = 0;
|
||||
}
|
||||
|
||||
List<ZolPhone> zolPhoneList = new ArrayList<>();
|
||||
|
||||
//拿到每个手机的 li 标签
|
||||
List<Selectable> lis = page.getHtml().css("#J_PicMode > li").nodes();
|
||||
|
||||
for (Selectable li : lis) {
|
||||
ZolPhone zolPhone = new ZolPhone();
|
||||
|
||||
//排除无用数据
|
||||
if ("display:none;".equals(li.css("li", "style").get())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
//获取手机的详情页面url
|
||||
String href = li.css("li > .pic", "href").get();
|
||||
|
||||
zolPhone.setDetailPage("https://detail.zol.com.cn/" + href);
|
||||
|
||||
//获取手机的名称
|
||||
String phoneName = li.css("li > h3 > a", "text").get();
|
||||
zolPhone.setPhoneName(phoneName);
|
||||
|
||||
//获取手机的描述
|
||||
String desc = li.css("li > h3 > a > span", "text").get();
|
||||
zolPhone.setDescription(desc);
|
||||
|
||||
//获取手机的参考价
|
||||
String price = li.css("li > .price-row .price-type", "text").get();
|
||||
//排除无用数据
|
||||
if ("概念产品".equals(price)) {
|
||||
continue;
|
||||
}
|
||||
zolPhone.setPrice(new BigDecimal(price));
|
||||
|
||||
//获取手机的评分
|
||||
String heat = li.css("li > .comment-row > .score", "text").get();
|
||||
zolPhone.setHeat(new BigDecimal(heat));
|
||||
|
||||
//获取手机图片的地址
|
||||
String picture = li.css("li > .pic > img", ".src").get();
|
||||
zolPhone.setPictureUrl(picture);
|
||||
|
||||
zolPhoneList.add(zolPhone);
|
||||
|
||||
//计数
|
||||
count++;
|
||||
}
|
||||
|
||||
page.putField("zolPhoneList",zolPhoneList);
|
||||
|
||||
redisService.setCacheObject(REPTILE_ZOL_PHONE_COUNT, count);
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage());
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
redisService.expire(REPTILE_ZOL_PHONE_COUNT, 3, TimeUnit.HOURS);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Site getSite() {
|
||||
return Site.me()
|
||||
//.addHeader(headerKey, headerValue)
|
||||
.setCharset("GBK")//设置字符编码
|
||||
.setTimeOut(2000)//设置超时时间
|
||||
.setRetrySleepTime(100)//设置重试间隔时间
|
||||
.setCycleRetryTimes(10)//设置重试次数
|
||||
.setSleepTime(1)//设置两个页面之间的间隔时间
|
||||
;
|
||||
}
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
package com.xjs.zol.task;
|
||||
|
||||
import com.xjs.XjsWebmagicApp;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
/**
|
||||
* @author xiejs
|
||||
* @since 2022-04-17
|
||||
*/
|
||||
@SpringBootTest(classes = XjsWebmagicApp.class)
|
||||
class ZolPhoneTaskTest {
|
||||
|
||||
@Autowired
|
||||
private ZolPhoneTask zolPhoneTask;
|
||||
|
||||
@Test
|
||||
void reptileZolPhone() {
|
||||
Long aLong = zolPhoneTask.reptileZolPhone();
|
||||
System.out.println(aLong);
|
||||
}
|
||||
}
|
Loading…
Reference in new issue