1、爬虫中关村手机第一页数据爬取并保存到数据库实现

pull/254/head
xjs 3 years ago
parent 54a6802148
commit 1a5624533f

@ -27,24 +27,18 @@
<el-row :gutter="10" class="mb8">
<el-col :span="1.5">
<el-button
type="primary"
icon="el-icon-plus"
size="mini"
@click="OnlineDrawingProcess"
v-hasPermi="['activiti:modeler']"
>在线绘制流程
</el-button>
<at-button type="info" hollow
size="smaller"
icon="icon-settings"
v-hasPermi="['activiti:modeler']"
@click="OnlineDrawingProcess" >在线绘制流程</at-button>
</el-col>
<el-col :span="1.5">
<el-button
type="primary"
icon="el-icon-plus"
size="mini"
@click="handleImport"
v-hasPermi="['activiti:modeler']"
>部署流程
</el-button>
<at-button type="info" hollow
size="smaller"
icon="icon-arrow-up"
v-hasPermi="['activiti:modeler']"
@click="handleImport" >部署流程</at-button>
</el-col>
<right-toolbar :showSearch.sync="showSearch" @queryTable="getList"></right-toolbar>

@ -60,6 +60,11 @@ public class RedisConst {
*/
public static final String REPTILE_WEIXIN_LINK_COUNT = "bussiness:reptile:weixin.link.count";
/**
*zol.phone
*/
public static final String REPTILE_ZOL_PHONE_COUNT = "bussiness:reptile:zol.phone.count";
/**
* key
*/

@ -40,6 +40,11 @@ public class ReptileConst {
*/
public static final String WEIXIN_SOUGOU_URL= "https://weixin.sogou.com/";
/**
*
*/
public static final String ZOL_PHONE_URL= "https://detail.zol.com.cn/cell_phone_index/subcate57_list_1.html";

@ -8,10 +8,12 @@ import com.ruoyi.common.security.annotation.RequiresPermissions;
import com.xjs.domain.mall.MailBean;
import com.xjs.domain.mall.MailVo;
import com.xjs.service.MailService;
import com.xjs.validation.group.AddGroup;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.validation.annotation.Validated;
import org.springframework.web.bind.annotation.*;
/**
@ -33,7 +35,7 @@ public class MailController {
@ApiOperation("发送邮件")
@RequiresPermissions("sendmail-send")
@Log(title = "发送邮件", businessType = BusinessType.INSERT)
public AjaxResult sendMail(MailVo mailVo) {
public AjaxResult sendMail(@Validated(AddGroup.class) MailVo mailVo) {
MailBean mailBean = new MailBean();
BeanUtils.copyProperties(mailVo, mailBean);
mailService.sendMail(mailBean);

@ -1,8 +1,11 @@
package com.xjs.domain.mall;
import com.xjs.validation.group.AddGroup;
import lombok.Data;
import org.springframework.web.multipart.MultipartFile;
import javax.validation.constraints.NotBlank;
import javax.validation.constraints.Size;
import java.io.Serializable;
/**
@ -17,14 +20,20 @@ public class MailVo implements Serializable {
/**
*
*/
@Size(min = 1, max = 40, message = "邮件接收人长度不能超过40个字符",groups = AddGroup.class)
@NotBlank(message = "邮件接收人不能为空",groups = AddGroup.class)
private String recipient;
/**
*
*/
@Size(min = 1, max = 100, message = "邮件主题长度不能超过100个字符",groups = AddGroup.class)
@NotBlank(message = "邮件主题不能为空",groups = AddGroup.class)
private String subject;
/**
*
*/
@NotBlank(message = "邮件内容不能为空",groups = AddGroup.class)
@Size(min = 1, max = 50000, message = "邮件内容长度不能超过50000个字符",groups = AddGroup.class)
private String content;
/**

@ -0,0 +1,33 @@
package com.xjs.zol.controller;
import com.ruoyi.common.core.domain.R;
import com.xjs.zol.task.ZolPhoneTask;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
/**
* controller
* @author xiejs
* @since 2022-04-18
*/
@RestController
@RequestMapping("zol-phone")
@Api(tags = "爬虫模块-中关村手机")
public class ZolPhoneController {
@Autowired
private ZolPhoneTask zolPhoneTask;
//------------------------------内部调用rpc-------------------------------------
@GetMapping("taskForPRC")
@ApiOperation("供定时任务服务RPC远程调用")
public R<Long> ZolPhoneTaskForRPC() {
Long aLong = zolPhoneTask.reptileZolPhone();
return R.ok(aLong);
}
}

@ -0,0 +1,13 @@
package com.xjs.zol.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.xjs.zol.pojo.ZolPhone;
/**
* mapper
* @author xiejs
* @since 2022-04-18
*/
public interface ZolPhoneMapper extends BaseMapper<ZolPhone> {
}

@ -0,0 +1,59 @@
package com.xjs.zol.pojo;
import com.baomidou.mybatisplus.annotation.FieldFill;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableName;
import com.ruoyi.common.core.annotation.Excel;
import com.xjs.entity.BaseEntity;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.Accessors;
import java.math.BigDecimal;
import java.util.Date;
/**
* webmagic_zol_phone
* @author xiejs
* @since 2022-04-17
*/
@Data
@EqualsAndHashCode(callSuper = true)
@Accessors(chain = true)
@TableName("webmagic_zol_phone")
public class ZolPhone extends BaseEntity {
private static final long serialVersionUID = 1L;
/** 主键id */
private Long id;
/** 手机名称 */
@Excel(name = "手机名称")
private String phoneName;
/** 图片地址 */
@Excel(name = "图片地址")
private String pictureUrl;
/** 手机描述 */
@Excel(name = "手机描述")
private String description;
/** 手机详情页面 */
@Excel(name = "手机详情页面")
private String detailPage;
/** 热度 */
@Excel(name = "热度")
private BigDecimal heat;
/** 价格 */
@Excel(name = "价格")
private BigDecimal price;
@Excel(name = "创建时间", dateFormat = "yyyy-MM-dd HH:mm:ss")
@TableField(fill = FieldFill.INSERT)
private Date createTime;
}

@ -0,0 +1,12 @@
package com.xjs.zol.service;
import com.baomidou.mybatisplus.extension.service.IService;
import com.xjs.zol.pojo.ZolPhone;
/**
* service
* @author xiejs
* @since 2022-04-18
*/
public interface ZolPhoneService extends IService<ZolPhone> {
}

@ -0,0 +1,16 @@
package com.xjs.zol.service.impl;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.xjs.zol.mapper.ZolPhoneMapper;
import com.xjs.zol.pojo.ZolPhone;
import com.xjs.zol.service.ZolPhoneService;
import org.springframework.stereotype.Service;
/**
* service
* @author xiejs
* @since 2022-04-18
*/
@Service
public class ZolPhoneServiceImpl extends ServiceImpl<ZolPhoneMapper, ZolPhone> implements ZolPhoneService {
}

@ -0,0 +1,54 @@
package com.xjs.zol.task;
import com.ruoyi.common.redis.service.RedisService;
import com.xjs.annotation.ReptileLog;
import com.xjs.zol.webmagic.ZolPhonePipeline;
import com.xjs.zol.webmagic.ZolPhoneProcessor;
import lombok.extern.log4j.Log4j2;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.scheduler.BloomFilterDuplicateRemover;
import us.codecraft.webmagic.scheduler.QueueScheduler;
import static com.xjs.consts.RedisConst.REPTILE_ZOL_PHONE_COUNT;
import static com.xjs.consts.ReptileConst.ZOL_PHONE_URL;
/**
*
*
* @author xiejs
* @since 2022-04-17
*/
@Component
@Log4j2
public class ZolPhoneTask {
@Autowired
private ZolPhoneProcessor zolPhoneProcessor;
@Autowired
private RedisService redisService;
@Autowired
private ZolPhonePipeline zolPhonePipeline;
@ReptileLog(name = "中关村手机", url = ZOL_PHONE_URL)
public Long reptileZolPhone() {
//执行爬虫
Spider.create(zolPhoneProcessor)
.addUrl(ZOL_PHONE_URL)//设置爬取地址
.thread(30)//设置爬取线程数
.setScheduler(new QueueScheduler()
.setDuplicateRemover(new BloomFilterDuplicateRemover(110000)))//设置url去重过滤器
.addPipeline(zolPhonePipeline)//设置爬取之后的数据操作
//.setDownloader(downloader)//设置下载器
.run();//同步执行
Integer cache = redisService.getCacheObject(REPTILE_ZOL_PHONE_COUNT);
redisService.deleteObject(REPTILE_ZOL_PHONE_COUNT);
if (cache != null) {
return Long.valueOf(cache);
}
return 0L;
}
}

@ -0,0 +1,43 @@
package com.xjs.zol.webmagic;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.xjs.zol.pojo.ZolPhone;
import com.xjs.zol.service.ZolPhoneService;
import lombok.extern.log4j.Log4j2;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Transactional;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;
import java.util.List;
import java.util.Objects;
/**
*
*
* @author xiejs
* @since 2022-04-17
*/
@Component
@Log4j2
@Transactional
public class ZolPhonePipeline implements Pipeline {
@Autowired
private ZolPhoneService zolPhoneService;
@Override
public void process(ResultItems resultItems, Task task) {
List<ZolPhone> zolPhoneList = resultItems.get("zolPhoneList");
//循环遍历集合,当对象的名称在数据库为空才插入数据
for (ZolPhone zolPhone : zolPhoneList) {
ZolPhone dbData = zolPhoneService.getOne(new LambdaQueryWrapper<ZolPhone>()
.eq(ZolPhone::getPhoneName, zolPhone.getPhoneName()), false);
if (Objects.isNull(dbData)) {
zolPhoneService.save(zolPhone);
}
}
}
}

@ -0,0 +1,113 @@
package com.xjs.zol.webmagic;
import com.ruoyi.common.redis.service.RedisService;
import com.xjs.zol.pojo.ZolPhone;
import lombok.extern.log4j.Log4j2;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import static com.xjs.consts.RedisConst.REPTILE_ZOL_PHONE_COUNT;
/**
*
*
* @author xiejs
* @since 2022-04-17
*/
@Log4j2
@Component
public class ZolPhoneProcessor implements PageProcessor {
@Autowired
private RedisService redisService;
@Override
public void process(Page page) {
try {
Integer count = redisService.getCacheObject(REPTILE_ZOL_PHONE_COUNT);
if (count == null) {
count = 0;
}
List<ZolPhone> zolPhoneList = new ArrayList<>();
//拿到每个手机的 li 标签
List<Selectable> lis = page.getHtml().css("#J_PicMode > li").nodes();
for (Selectable li : lis) {
ZolPhone zolPhone = new ZolPhone();
//排除无用数据
if ("display:none;".equals(li.css("li", "style").get())) {
continue;
}
//获取手机的详情页面url
String href = li.css("li > .pic", "href").get();
zolPhone.setDetailPage("https://detail.zol.com.cn/" + href);
//获取手机的名称
String phoneName = li.css("li > h3 > a", "text").get();
zolPhone.setPhoneName(phoneName);
//获取手机的描述
String desc = li.css("li > h3 > a > span", "text").get();
zolPhone.setDescription(desc);
//获取手机的参考价
String price = li.css("li > .price-row .price-type", "text").get();
//排除无用数据
if ("概念产品".equals(price)) {
continue;
}
zolPhone.setPrice(new BigDecimal(price));
//获取手机的评分
String heat = li.css("li > .comment-row > .score", "text").get();
zolPhone.setHeat(new BigDecimal(heat));
//获取手机图片的地址
String picture = li.css("li > .pic > img", ".src").get();
zolPhone.setPictureUrl(picture);
zolPhoneList.add(zolPhone);
//计数
count++;
}
page.putField("zolPhoneList",zolPhoneList);
redisService.setCacheObject(REPTILE_ZOL_PHONE_COUNT, count);
} catch (Exception e) {
log.error(e.getMessage());
e.printStackTrace();
} finally {
redisService.expire(REPTILE_ZOL_PHONE_COUNT, 3, TimeUnit.HOURS);
}
}
@Override
public Site getSite() {
return Site.me()
//.addHeader(headerKey, headerValue)
.setCharset("GBK")//设置字符编码
.setTimeOut(2000)//设置超时时间
.setRetrySleepTime(100)//设置重试间隔时间
.setCycleRetryTimes(10)//设置重试次数
.setSleepTime(1)//设置两个页面之间的间隔时间
;
}
}

@ -0,0 +1,25 @@
package com.xjs.zol.task;
import com.xjs.XjsWebmagicApp;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import static org.junit.jupiter.api.Assertions.*;
/**
* @author xiejs
* @since 2022-04-17
*/
@SpringBootTest(classes = XjsWebmagicApp.class)
class ZolPhoneTaskTest {
@Autowired
private ZolPhoneTask zolPhoneTask;
@Test
void reptileZolPhone() {
Long aLong = zolPhoneTask.reptileZolPhone();
System.out.println(aLong);
}
}
Loading…
Cancel
Save