1、爬虫日志功能实现(AOP)

2、爬虫日志持久化到数据库
pull/254/head
xjs 4 years ago
parent 3cbfd09956
commit ede181d001

@ -3,6 +3,7 @@ package com.xjs.business.log;
import com.ruoyi.common.core.constant.ServiceNameConstants; import com.ruoyi.common.core.constant.ServiceNameConstants;
import com.ruoyi.common.core.domain.R; import com.ruoyi.common.core.domain.R;
import com.xjs.business.log.domain.ApiLog; import com.xjs.business.log.domain.ApiLog;
import com.xjs.business.log.domain.WebmagicLog;
import com.xjs.business.log.factory.RemoteLogFactory; import com.xjs.business.log.factory.RemoteLogFactory;
import org.springframework.cloud.openfeign.FeignClient; import org.springframework.cloud.openfeign.FeignClient;
import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.PostMapping;
@ -20,4 +21,8 @@ public interface RemoteLogFeign {
@PostMapping("/apilog/forPRC") @PostMapping("/apilog/forPRC")
R<Object> saveApiLog(@RequestBody ApiLog apiLog); R<Object> saveApiLog(@RequestBody ApiLog apiLog);
@PostMapping("reptileLog/saveForPRC")
public R<Object> saveReptileLog(@RequestBody WebmagicLog webmagicLog);
} }

@ -0,0 +1,39 @@
package com.xjs.business.log.domain;
import lombok.Data;
import java.io.Serializable;
import java.util.Date;
/**
*
* @author xiejs
* @since 2022-02-17
*/
@Data
public class WebmagicLog implements Serializable {
private static final long serialVersionUID = 1L;
/** 主键 */
private Long id;
/** 爬虫名称 */
private String name;
/** 爬虫地址 */
private String url;
/**
*
*/
private Long complexRate;
private Integer status;
/** 请求耗费时间(单位毫秒) */
private Long requestTime;
private Date createTime;
}

@ -1,8 +1,9 @@
package com.xjs.business.log.factory; package com.xjs.business.log.factory;
import com.ruoyi.common.core.domain.R; import com.ruoyi.common.core.domain.R;
import com.xjs.business.api.factory.RemoteTranDictFactory;
import com.xjs.business.log.RemoteLogFeign; import com.xjs.business.log.RemoteLogFeign;
import com.xjs.business.log.domain.ApiLog;
import com.xjs.business.log.domain.WebmagicLog;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.cloud.openfeign.FallbackFactory; import org.springframework.cloud.openfeign.FallbackFactory;
@ -19,7 +20,18 @@ public class RemoteLogFactory implements FallbackFactory<RemoteLogFeign> {
@Override @Override
public RemoteLogFeign create(Throwable cause) { public RemoteLogFeign create(Throwable cause) {
log.error("日志模块服务添加调用失败:{}", cause.getMessage()); return new RemoteLogFeign() {
return apiLog -> R.fail("日志模块服务添加调用失败" + cause.getMessage()); @Override
public R<Object> saveApiLog(ApiLog apiLog) {
log.error("日志模块api日志服务添加调用失败");
return R.fail("日志模块api日志服务添加调用失败" + cause.getMessage());
}
@Override
public R<Object> saveReptileLog(WebmagicLog webmagicLog) {
log.error("日志模块爬虫日志服务添加调用失败");
return R.fail("日志模块爬虫日志服务添加调用失败" + cause.getMessage());
}
};
} }
} }

@ -82,6 +82,8 @@
<el-table-column label="文案标签" align="center" prop="type" :show-overflow-tooltip="true" width="120px"/> <el-table-column label="文案标签" align="center" prop="type" :show-overflow-tooltip="true" width="120px"/>
<el-table-column label="文案主题" align="center" prop="theme" :show-overflow-tooltip="true" width="200px"/> <el-table-column label="文案主题" align="center" prop="theme" :show-overflow-tooltip="true" width="200px"/>
<el-table-column label="文案内容" align="center" prop="content" :show-overflow-tooltip="true"/> <el-table-column label="文案内容" align="center" prop="content" :show-overflow-tooltip="true"/>
<el-table-column label="创建时间" align="center" prop="createTime" width="250px" :show-overflow-tooltip="true">
</el-table-column>
<el-table-column label="操作" align="center" class-name="small-padding fixed-width" width="150px"> <el-table-column label="操作" align="center" class-name="small-padding fixed-width" width="150px">
<template slot-scope="scope"> <template slot-scope="scope">
<el-button <el-button

@ -0,0 +1,25 @@
package com.xjs.annotation;
import java.lang.annotation.*;
/**
*
* @author xiejs
* @since 2022-02-17
*/
@Target({ ElementType.PARAMETER, ElementType.METHOD })
@Retention(RetentionPolicy.RUNTIME)
@Documented
public @interface ReptileLog {
/**
*
*/
String name() default "";
/**
* url
*/
String url() default "";
}

@ -0,0 +1,38 @@
package com.xjs.reptileLog.controller;
import com.ruoyi.common.core.domain.R;
import com.xjs.reptileLog.domain.WebmagicLog;
import com.xjs.reptileLog.service.WebmagicLogService;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
/**
*
* @author xiejs
* @since 2022-02-17
*/
@RestController
@RequestMapping("reptileLog")
@Api(tags = "业务模块-爬虫日志")
public class WebmagicLogController {
@Autowired
private WebmagicLogService webmagicLogService;
//-----------------------内部调用rpc------------------------
@PostMapping("saveForPRC")
@ApiOperation("供AOP切面RPC远程调用")
public R<Object> saveReptileLog(@RequestBody WebmagicLog webmagicLog) {
boolean save = webmagicLogService.save(webmagicLog);
return save?R.ok():R.fail();
}
}

@ -0,0 +1,50 @@
package com.xjs.reptileLog.domain;
import com.baomidou.mybatisplus.annotation.FieldFill;
import com.baomidou.mybatisplus.annotation.TableField;
import com.ruoyi.common.core.annotation.Excel;
import lombok.Data;
import java.io.Serializable;
import java.util.Date;
/**
*
* @author xiejs
* @since 2022-02-17
*/
@Data
public class WebmagicLog implements Serializable {
private static final long serialVersionUID = 1L;
/** 主键 */
private Long id;
/** 爬虫名称 */
@Excel(name = "爬虫名称")
private String name;
/** 爬虫地址 */
@Excel(name = "爬虫地址")
private String url;
/**
*
*/
@Excel(name = "复杂度")
private Long complexRate;
@Excel(name = "执行结果",readConverterExp = "1=成功,2=失败")
private Integer status;
/** 请求耗费时间(单位毫秒) */
@Excel(name = "请求耗费时间")
private Long requestTime;
@Excel(name = "创建时间" ,dateFormat = "yyyy-MM-dd HH:mm:ss")
@TableField(fill = FieldFill.INSERT)
private Date createTime;
}

@ -0,0 +1,13 @@
package com.xjs.reptileLog.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.xjs.reptileLog.domain.WebmagicLog;
/**
* WebmagicLog mapper
* @author xiejs
* @since 2022-02-17
*/
public interface WebmagicLogMapper extends BaseMapper<WebmagicLog> {
}

@ -0,0 +1,13 @@
package com.xjs.reptileLog.service;
import com.baomidou.mybatisplus.extension.service.IService;
import com.xjs.reptileLog.domain.WebmagicLog;
/**
* Service
* @author xiejs
* @since 2022-02-17
*/
public interface WebmagicLogService extends IService<WebmagicLog> {
}

@ -0,0 +1,16 @@
package com.xjs.reptileLog.service.impl;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.xjs.reptileLog.domain.WebmagicLog;
import com.xjs.reptileLog.mapper.WebmagicLogMapper;
import com.xjs.reptileLog.service.WebmagicLogService;
import org.springframework.stereotype.Service;
/**
* @author xiejs
* @since 2022-02-17
*/
@Service
public class WebmagicLogServiceImpl extends ServiceImpl<WebmagicLogMapper, WebmagicLog> implements WebmagicLogService {
}

@ -0,0 +1,123 @@
package com.xjs.common.aop;
import cn.hutool.core.date.DateUtil;
import com.ruoyi.common.core.domain.R;
import com.xjs.annotation.ReptileLog;
import com.xjs.business.log.RemoteLogFeign;
import com.xjs.business.log.domain.WebmagicLog;
import lombok.extern.log4j.Log4j2;
import org.aspectj.lang.ProceedingJoinPoint;
import org.aspectj.lang.Signature;
import org.aspectj.lang.annotation.Around;
import org.aspectj.lang.annotation.Aspect;
import org.aspectj.lang.annotation.Pointcut;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.lang.annotation.Annotation;
import java.lang.reflect.Method;
import java.time.LocalDateTime;
import java.time.temporal.ChronoUnit;
import static com.xjs.consts.ReqConst.ERROR;
import static com.xjs.consts.ReqConst.SUCCESS;
/**
*
*
* @author xiejs
* @since 2022-02-17
*/
@Component
@Aspect
@Log4j2
public class reptileLogAspect {
@Resource
private RemoteLogFeign remoteLogFeign;
/**
* AOP
*/
@Pointcut("@annotation(com.xjs.annotation.ReptileLog)")
public void pointcut() {
}
/**
*
*/
@Around("pointcut()")
public Object doAround(ProceedingJoinPoint joinPoint) throws Throwable {
Object obj = null;
try {
//切入前-----
LocalDateTime localDateTime1 = DateUtil.date().toLocalDateTime();
obj = joinPoint.proceed();
//切入后-----
LocalDateTime localDateTime2 = DateUtil.date().toLocalDateTime();
long between = ChronoUnit.MILLIS.between(localDateTime1, localDateTime2);
log.info("调用爬虫接口耗费时间:{}ms", between);
this.handle(joinPoint, between, obj);
} catch (Throwable e) {
e.printStackTrace();
}
return obj;
}
/**
*
*
* @param joinPoint
* @param between
* @param obj
*/
private void handle(ProceedingJoinPoint joinPoint, Long between, Object obj) {
//获取目标类名及方法名
Signature signature = joinPoint.getSignature();
String method = signature.getName();
Class aClass = signature.getDeclaringType();
Method[] methods = aClass.getMethods();
//根据目标的方法名判断当前方法
for (Method thisMethod : methods) {
if (method.equals(thisMethod.getName())) {
Annotation[] declaredAnnotations = thisMethod.getDeclaredAnnotations();
for (Annotation annotation : declaredAnnotations) {
if (annotation instanceof ReptileLog) {
String name = ((ReptileLog) annotation).name();
String url = ((ReptileLog) annotation).url();
WebmagicLog webmagicLog = new WebmagicLog();
webmagicLog.setName(name);
webmagicLog.setUrl(url);
webmagicLog.setRequestTime(between);
if (obj instanceof Long) {
webmagicLog.setComplexRate((Long) obj);
}
this.saveData(webmagicLog);
}
}
}
}
}
/**
*
*/
private void saveData(WebmagicLog webmagicLog) {
if (webmagicLog.getComplexRate() != null && webmagicLog.getComplexRate() == 0L) {
webmagicLog.setStatus(ERROR);
} else {
webmagicLog.setStatus(SUCCESS);
}
R<Object> r = remoteLogFeign.saveReptileLog(webmagicLog);
log.info(r.getMsg());
}
}

@ -49,8 +49,8 @@ public class CopyWritingNetworkController extends MyBaseController {
@GetMapping("taskForPRC") @GetMapping("taskForPRC")
@ApiOperation("供定时任务服务RPC远程调用") @ApiOperation("供定时任务服务RPC远程调用")
public R copyWritingNetworkTaskForPRC() { public R copyWritingNetworkTaskForPRC() {
copyWritingNetworkTask.reptileCopyWriting(); Long count = copyWritingNetworkTask.reptileCopyWriting();
return R.ok(); return R.ok(count);
} }

@ -1,5 +1,6 @@
package com.xjs.copywritingNetwork.task; package com.xjs.copywritingNetwork.task;
import com.xjs.annotation.ReptileLog;
import com.xjs.common.util.HttpUtils; import com.xjs.common.util.HttpUtils;
import com.xjs.copywritingNetwork.pojo.CopyWritingNetwork; import com.xjs.copywritingNetwork.pojo.CopyWritingNetwork;
import com.xjs.copywritingNetwork.service.CopyWritingNetworkService; import com.xjs.copywritingNetwork.service.CopyWritingNetworkService;
@ -39,25 +40,33 @@ public class CopyWritingNetworkTask {
private static final Pattern pattern = Pattern.compile(NUMBER_REGEX); private static final Pattern pattern = Pattern.compile(NUMBER_REGEX);
public void reptileCopyWriting() { @ReptileLog(name = "文案网", url = URL)
public Long reptileCopyWriting() {
//定义循环次数计时器
Long count = 0L;
try { try {
String html = httpUtils.doGetHtml(URL); String html = httpUtils.doGetHtml(URL);
Document document = Jsoup.parse(html); Document document = Jsoup.parse(html);
this.parseHtmlGetUrl(document); count = this.parseHtmlGetUrl(document, count);
} catch (Exception e) { } catch (Exception e) {
log.error(e.getMessage()); log.error(e.getMessage());
} }
return count;
} }
/** /**
* htmlurl * htmlurl
* *
* @param document * @param document dom
* @param count
* @return
*/ */
private void parseHtmlGetUrl(Document document) { private Long parseHtmlGetUrl(Document document, Long count) {
Elements zyzt = document.getElementsByClass("zyzt"); Elements zyzt = document.getElementsByClass("zyzt");
Map<String, String> map = new HashMap<>(); Map<String, String> map = new HashMap<>();
@ -67,19 +76,24 @@ public class CopyWritingNetworkTask {
String text = elementA.text(); String text = elementA.text();
String href = elementA.attr("href"); String href = elementA.attr("href");
map.put(text, href); map.put(text, href);
//计数
count++;
} }
} }
this.parseHtmlGetCopyWriting(map); return this.parseHtmlGetCopyWriting(map, count);
} }
/** /**
* html * html
* *
* @param map url * @param map url
* @param count
* @return
*/ */
private void parseHtmlGetCopyWriting(Map<String, String> map) { private Long parseHtmlGetCopyWriting(Map<String, String> map, Long count) {
ArrayList<CopyWritingNetwork> copyWritingNetworks = new ArrayList<>(); ArrayList<CopyWritingNetwork> copyWritingNetworks = new ArrayList<>();
for (Map.Entry<String, String> entry : map.entrySet()) { for (Map.Entry<String, String> entry : map.entrySet()) {
@ -110,6 +124,9 @@ public class CopyWritingNetworkTask {
if (StringUtils.isNotEmpty(content) && !matches) { if (StringUtils.isNotEmpty(content) && !matches) {
copyWritingNetworks.add(copyWritingNetwork); copyWritingNetworks.add(copyWritingNetwork);
} }
//计数
count++;
} }
} }
} }
@ -119,6 +136,7 @@ public class CopyWritingNetworkTask {
int i = copyWritingNetworkService.deleteRepeatData(); int i = copyWritingNetworkService.deleteRepeatData();
log.info("删除文案网数据重复数:" + i); log.info("删除文案网数据重复数:" + i);
return count;
} }

@ -45,8 +45,8 @@ public class SinaNewsController extends MyBaseController {
@GetMapping("taskForPRC") @GetMapping("taskForPRC")
@ApiOperation("供定时任务服务RPC远程调用") @ApiOperation("供定时任务服务RPC远程调用")
public R sinaTaskForPRC() { public R sinaTaskForPRC() {
sinaNewsTask.reptileSinaNews(); Long count = sinaNewsTask.reptileSinaNews();
return R.ok(); return R.ok(count);
} }

@ -2,6 +2,7 @@ package com.xjs.sina.task;
import cn.hutool.core.collection.CollUtil; import cn.hutool.core.collection.CollUtil;
import com.ruoyi.common.core.utils.StringUtils; import com.ruoyi.common.core.utils.StringUtils;
import com.xjs.annotation.ReptileLog;
import com.xjs.common.util.HttpUtils; import com.xjs.common.util.HttpUtils;
import com.xjs.sina.pojo.SinaNews; import com.xjs.sina.pojo.SinaNews;
import com.xjs.sina.service.SinaNewsService; import com.xjs.sina.service.SinaNewsService;
@ -32,25 +33,31 @@ public class SinaNewsTask {
public static final String URL = "https://news.sina.com.cn/"; public static final String URL = "https://news.sina.com.cn/";
public void reptileSinaNews() { @ReptileLog(name = "新浪新闻", url = URL)
public Long reptileSinaNews() {
//定义循环次数计时器
Long count = 0L;
try { try {
String html = httpUtils.doGetHtml(URL); String html = httpUtils.doGetHtml(URL);
Document document = Jsoup.parse(html); Document document = Jsoup.parse(html);
this.parse(document); count = this.parse(document,count);
} catch (Exception e) { } catch (Exception e) {
log.error(e.getMessage()); log.error(e.getMessage());
} }
return count;
} }
/** /**
* dom * dom
* *
* @param document dom * @param document dom
* @param count
*/ */
private void parse(Document document) { private Long parse(Document document,Long count) {
try { try {
//获取子链接 //获取子链接
Elements nav_mod_1 = document.getElementsByClass("nav-mod-1"); Elements nav_mod_1 = document.getElementsByClass("nav-mod-1");
@ -69,13 +76,17 @@ public class SinaNewsTask {
for (Map.Entry<String, String> entry : entrySet) { for (Map.Entry<String, String> entry : entrySet) {
String html = httpUtils.doGetHtml(entry.getValue()); String html = httpUtils.doGetHtml(entry.getValue());
Document docChild = Jsoup.parse(html); Document docChild = Jsoup.parse(html);
this.parseChile(docChild, entry.getKey());
//计数
count++;
count =this.parseChile(docChild, entry.getKey(),count);
} }
} }
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); log.error(e.getMessage());
} }
return count;
} }
/** /**
@ -84,7 +95,7 @@ public class SinaNewsTask {
* @param docChild * @param docChild
* @param key key * @param key key
*/ */
private void parseChile(Document docChild, String key) { private Long parseChile(Document docChild, String key,Long count) {
try { try {
Elements a = docChild.getElementsByTag("a"); Elements a = docChild.getElementsByTag("a");
ArrayList<String> link = new ArrayList<>(); ArrayList<String> link = new ArrayList<>();
@ -163,15 +174,21 @@ public class SinaNewsTask {
sinaNewsList.add(sinaNews); sinaNewsList.add(sinaNews);
} }
} }
//计数
count++;
sinaNewsService.saveBatch(sinaNewsList, 30); sinaNewsService.saveBatch(sinaNewsList, 30);
//删除重复 //删除重复
int count = sinaNewsService.deleteRepeatData(); int num = sinaNewsService.deleteRepeatData();
log.info("重复数据为:{}", count); log.info("重复数据为:{}", num);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); log.error(e.getMessage());
} }
return count;
} }
} }

Loading…
Cancel
Save