1、菜单管理页面新增菜单类型显示

2、微信文章链接爬虫前端实现
3、爬虫aop获取url地址逻辑调整
4、爬虫爬取微信文章图片大小过滤
5、当天不爬取重复数据
pull/254/head
xjs 4 years ago
parent 59400b2e3e
commit 8b697a5d0e

@ -0,0 +1,37 @@
import request from '@/utils/request'
// 获取参数配置
export function getSettings() {
return request({
url: '/webmagic/weixin_link/getSettings',
method: 'get',
})
}
// 修改参数配置
export function updateSettings(path) {
return request({
url: '/webmagic/weixin_link/updateSettings',
method: 'put',
params: path
})
}
// 重置参数配置
export function resetSettings() {
return request({
url: '/webmagic/weixin_link/resetSettings',
method: 'put',
})
}
// 获取文章图片(执行爬虫)
export function getPicture(link) {
return request({
url: '/webmagic/weixin_link/getPicture',
method: 'get',
params: link
})
}

@ -0,0 +1,172 @@
<template>
<div class="">
<el-row>
<el-col :span="24">
<div class="top_col">
<span style="cursor: pointer;font-size: 14px">参数配置</span>
</div>
</el-col>
</el-row>
<el-row :gutter="15">
<el-form ref="elForm" :model="formData" :rules="rules" size="medium" label-width="100px">
<el-col :span="7">
<el-form-item label="保存路径" prop="path">
<el-input v-model="formData.path" placeholder="请输入磁盘地址" :maxlength="200" clearable
prefix-icon='el-icon-s-tools' :style="{width: '100%'}"></el-input>
</el-form-item>
</el-col>
<el-col :span="9">
<el-form-item>
<el-button
size="mini"
type="primary"
@click="submitForm"
v-hasPermi="['webmagic:weixinlink:update']"
>提交
</el-button>
<el-button size="mini" @click="resetForm"></el-button>
<el-button
size="mini"
type="info"
icon="el-icon-refresh"
@click="resetSettings"
v-hasPermi="['webmagic:weixinlink:reset']"
>恢复默认
</el-button>
</el-form-item>
</el-col>
</el-form>
</el-row>
<el-row>
<el-col :span="24">
<div class="top_col">
<span style="cursor: pointer;font-size: 14px">爬虫操作</span>
<el-tooltip content="请复制微信文章的链接地址" placement="top">
<i class="el-icon-question"></i>
</el-tooltip>
</div>
</el-col>
</el-row>
<el-row :gutter="15">
<el-form size="medium" label-width="100px" :rules="linkRules" ref="linkRules" :model="requestData">
<el-col :span="7">
<el-form-item prop="link" label="链接">
<el-input v-model="requestData.link" placeholder="请输入下载图片链接" :maxlength="200" clearable
prefix-icon='el-icon-s-tools' :style="{width: '100%'}"></el-input>
</el-form-item>
</el-col>
<el-col :span="7">
<el-form-item>
<el-button @click="getPicture" size="mini" type="primary" :loading="loading">执行</el-button>
</el-form-item>
</el-col>
</el-form>
</el-row>
</div>
</template>
<script>
import {getSettings, updateSettings, resetSettings, getPicture} from "@/api/business/webmagic/weixinlink/weixinlink";
export default {
name: "WeiXinCrawler",
data() {
return {
//
formData: {
path: undefined,
},
rules: {
path: [
{required: true, message: '请输入下载图片路径路径', trigger: 'blur'}
],
},
linkRules: {
link: [
{required: true, message: '请输入下载链接', trigger: 'blur'}
],
},
loading: false,
requestData: {
link: "",
},
}
},
created() {
this.getSettings()
}
,
methods: {
getPicture() {
this.$refs['linkRules'].validate(valid => {
if (!valid) return
this.loading = true
getPicture(this.requestData).then(res => {
this.loading = false
this.$modal.notifySuccess("执行成功")
}).catch(err => {
this.loading = false
})
})
}
,
resetForm() {
this.$refs['elForm'].resetFields()
}
,
//
getSettings() {
this.$modal.loading("请稍后...")
getSettings().then(res => {
this.$modal.closeLoading()
this.formData.path = res.data
})
}
,
//
resetSettings() {
resetSettings().then(res => {
this.$modal.notifySuccess("重置成功");
this.getSettings()
})
}
,
submitForm() {
this.$refs['elForm'].validate(valid => {
if (!valid) return
updateSettings(this.formData).then(res => {
this.$modal.notifySuccess("修改成功");
})
})
}
,
}
}
</script>
<style scoped>
.top_col {
height: 40px;
margin: 10px 10px;
background-color: #ffba00;
line-height: 40px;
padding-left: 20px;
border-radius: 17px;
font-size: 13px;
}
</style>

@ -35,7 +35,8 @@
size="mini" size="mini"
@click="handleAdd" @click="handleAdd"
v-hasPermi="['system:menu:add']" v-hasPermi="['system:menu:add']"
>新增</el-button> >新增
</el-button>
</el-col> </el-col>
<el-col :span="1.5"> <el-col :span="1.5">
<el-button <el-button
@ -44,7 +45,8 @@
icon="el-icon-sort" icon="el-icon-sort"
size="mini" size="mini"
@click="toggleExpandAll" @click="toggleExpandAll"
>展开/折叠</el-button> >展开/折叠
</el-button>
</el-col> </el-col>
<right-toolbar :showSearch.sync="showSearch" @queryTable="getList"></right-toolbar> <right-toolbar :showSearch.sync="showSearch" @queryTable="getList"></right-toolbar>
</el-row> </el-row>
@ -60,10 +62,33 @@
<el-table-column prop="menuName" label="菜单名称" :show-overflow-tooltip="true" width="200"></el-table-column> <el-table-column prop="menuName" label="菜单名称" :show-overflow-tooltip="true" width="200"></el-table-column>
<el-table-column prop="icon" label="图标" align="center" width="100"> <el-table-column prop="icon" label="图标" align="center" width="100">
<template slot-scope="scope"> <template slot-scope="scope">
<svg-icon :icon-class="scope.row.icon" /> <svg-icon :icon-class="scope.row.icon"/>
</template> </template>
</el-table-column> </el-table-column>
<el-table-column prop="orderNum" label="排序" width="60"></el-table-column> <el-table-column prop="orderNum" label="排序" width="60"></el-table-column>
<el-table-column prop="menuType" label="菜单类型" width="100">
<template slot-scope="scope">
<span v-if="scope.row.menuType==='M'">
<el-tag
type=""
effect="plain">
目录
</el-tag>
</span>
<span v-if="scope.row.menuType==='C'"><el-tag
type="warning"
effect="plain">
菜单
</el-tag>
</span>
<span v-if="scope.row.menuType==='F'"><el-tag
type="danger"
effect="plain">
按钮
</el-tag>
</span>
</template>
</el-table-column>
<el-table-column prop="perms" label="权限标识" :show-overflow-tooltip="true"></el-table-column> <el-table-column prop="perms" label="权限标识" :show-overflow-tooltip="true"></el-table-column>
<el-table-column prop="component" label="组件路径" :show-overflow-tooltip="true"></el-table-column> <el-table-column prop="component" label="组件路径" :show-overflow-tooltip="true"></el-table-column>
<el-table-column prop="status" label="状态" width="80"> <el-table-column prop="status" label="状态" width="80">
@ -84,21 +109,24 @@
icon="el-icon-edit" icon="el-icon-edit"
@click="handleUpdate(scope.row)" @click="handleUpdate(scope.row)"
v-hasPermi="['system:menu:edit']" v-hasPermi="['system:menu:edit']"
>修改</el-button> >修改
</el-button>
<el-button <el-button
size="mini" size="mini"
type="text" type="text"
icon="el-icon-plus" icon="el-icon-plus"
@click="handleAdd(scope.row)" @click="handleAdd(scope.row)"
v-hasPermi="['system:menu:add']" v-hasPermi="['system:menu:add']"
>新增</el-button> >新增
</el-button>
<el-button <el-button
size="mini" size="mini"
type="text" type="text"
icon="el-icon-delete" icon="el-icon-delete"
@click="handleDelete(scope.row)" @click="handleDelete(scope.row)"
v-hasPermi="['system:menu:remove']" v-hasPermi="['system:menu:remove']"
>删除</el-button> >删除
</el-button>
</template> </template>
</el-table-column> </el-table-column>
</el-table> </el-table>
@ -135,7 +163,7 @@
trigger="click" trigger="click"
@show="$refs['iconSelect'].reset()" @show="$refs['iconSelect'].reset()"
> >
<IconSelect ref="iconSelect" @selected="selected" /> <IconSelect ref="iconSelect" @selected="selected"/>
<el-input slot="reference" v-model="form.icon" placeholder="点击选择图标" readonly> <el-input slot="reference" v-model="form.icon" placeholder="点击选择图标" readonly>
<svg-icon <svg-icon
v-if="form.icon" v-if="form.icon"
@ -144,19 +172,19 @@
class="el-input__icon" class="el-input__icon"
style="height: 32px;width: 16px;" style="height: 32px;width: 16px;"
/> />
<i v-else slot="prefix" class="el-icon-search el-input__icon" /> <i v-else slot="prefix" class="el-icon-search el-input__icon"/>
</el-input> </el-input>
</el-popover> </el-popover>
</el-form-item> </el-form-item>
</el-col> </el-col>
<el-col :span="12"> <el-col :span="12">
<el-form-item label="菜单名称" prop="menuName"> <el-form-item label="菜单名称" prop="menuName">
<el-input v-model="form.menuName" placeholder="请输入菜单名称" /> <el-input v-model="form.menuName" placeholder="请输入菜单名称"/>
</el-form-item> </el-form-item>
</el-col> </el-col>
<el-col :span="12"> <el-col :span="12">
<el-form-item label="显示排序" prop="orderNum"> <el-form-item label="显示排序" prop="orderNum">
<el-input-number v-model="form.orderNum" controls-position="right" :min="0" /> <el-input-number v-model="form.orderNum" controls-position="right" :min="0"/>
</el-form-item> </el-form-item>
</el-col> </el-col>
<el-col :span="12" v-if="form.menuType != 'F'"> <el-col :span="12" v-if="form.menuType != 'F'">
@ -181,7 +209,7 @@
</el-tooltip> </el-tooltip>
路由地址 路由地址
</span> </span>
<el-input v-model="form.path" placeholder="请输入路由地址" /> <el-input v-model="form.path" placeholder="请输入路由地址"/>
</el-form-item> </el-form-item>
</el-col> </el-col>
<el-col :span="12" v-if="form.menuType == 'C'"> <el-col :span="12" v-if="form.menuType == 'C'">
@ -192,12 +220,12 @@
</el-tooltip> </el-tooltip>
组件路径 组件路径
</span> </span>
<el-input v-model="form.component" placeholder="请输入组件路径" /> <el-input v-model="form.component" placeholder="请输入组件路径"/>
</el-form-item> </el-form-item>
</el-col> </el-col>
<el-col :span="12" v-if="form.menuType != 'M'"> <el-col :span="12" v-if="form.menuType != 'M'">
<el-form-item> <el-form-item>
<el-input v-model="form.perms" placeholder="请输入权限标识" maxlength="100" /> <el-input v-model="form.perms" placeholder="请输入权限标识" maxlength="100"/>
<span slot="label"> <span slot="label">
<el-tooltip content="控制器中定义的权限字符,如:@PreAuthorize(`@ss.hasPermi('system:user:list')`)" placement="top"> <el-tooltip content="控制器中定义的权限字符,如:@PreAuthorize(`@ss.hasPermi('system:user:list')`)" placement="top">
<i class="el-icon-question"></i> <i class="el-icon-question"></i>
@ -208,7 +236,7 @@
</el-col> </el-col>
<el-col :span="12" v-if="form.menuType == 'C'"> <el-col :span="12" v-if="form.menuType == 'C'">
<el-form-item> <el-form-item>
<el-input v-model="form.query" placeholder="请输入路由参数" maxlength="255" /> <el-input v-model="form.query" placeholder="请输入路由参数" maxlength="255"/>
<span slot="label"> <span slot="label">
<el-tooltip content='访问路由的默认传递参数,如:`{"id": 1, "name": "ry"}`' placement="top"> <el-tooltip content='访问路由的默认传递参数,如:`{"id": 1, "name": "ry"}`' placement="top">
<i class="el-icon-question"></i> <i class="el-icon-question"></i>
@ -244,7 +272,8 @@
v-for="dict in dict.type.sys_show_hide" v-for="dict in dict.type.sys_show_hide"
:key="dict.value" :key="dict.value"
:label="dict.value" :label="dict.value"
>{{dict.label}}</el-radio> >{{ dict.label }}
</el-radio>
</el-radio-group> </el-radio-group>
</el-form-item> </el-form-item>
</el-col> </el-col>
@ -261,7 +290,8 @@
v-for="dict in dict.type.sys_normal_disable" v-for="dict in dict.type.sys_normal_disable"
:key="dict.value" :key="dict.value"
:label="dict.value" :label="dict.value"
>{{dict.label}}</el-radio> >{{ dict.label }}
</el-radio>
</el-radio-group> </el-radio-group>
</el-form-item> </el-form-item>
</el-col> </el-col>
@ -276,7 +306,7 @@
</template> </template>
<script> <script>
import { listMenu, getMenu, delMenu, addMenu, updateMenu } from "@/api/system/menu"; import {listMenu, getMenu, delMenu, addMenu, updateMenu} from "@/api/system/menu";
import Treeselect from "@riophae/vue-treeselect"; import Treeselect from "@riophae/vue-treeselect";
import "@riophae/vue-treeselect/dist/vue-treeselect.css"; import "@riophae/vue-treeselect/dist/vue-treeselect.css";
import IconSelect from "@/components/IconSelect"; import IconSelect from "@/components/IconSelect";
@ -284,7 +314,7 @@ import IconSelect from "@/components/IconSelect";
export default { export default {
name: "Menu", name: "Menu",
dicts: ['sys_show_hide', 'sys_normal_disable'], dicts: ['sys_show_hide', 'sys_normal_disable'],
components: { Treeselect, IconSelect }, components: {Treeselect, IconSelect},
data() { data() {
return { return {
// //
@ -313,13 +343,13 @@ export default {
// //
rules: { rules: {
menuName: [ menuName: [
{ required: true, message: "菜单名称不能为空", trigger: "blur" } {required: true, message: "菜单名称不能为空", trigger: "blur"}
], ],
orderNum: [ orderNum: [
{ required: true, message: "菜单顺序不能为空", trigger: "blur" } {required: true, message: "菜单顺序不能为空", trigger: "blur"}
], ],
path: [ path: [
{ required: true, message: "路由地址不能为空", trigger: "blur" } {required: true, message: "路由地址不能为空", trigger: "blur"}
] ]
} }
}; };
@ -355,7 +385,7 @@ export default {
getTreeselect() { getTreeselect() {
listMenu().then(response => { listMenu().then(response => {
this.menuOptions = []; this.menuOptions = [];
const menu = { menuId: 0, menuName: '主类目', children: [] }; const menu = {menuId: 0, menuName: '主类目', children: []};
menu.children = this.handleTree(response.data, "menuId"); menu.children = this.handleTree(response.data, "menuId");
this.menuOptions.push(menu); this.menuOptions.push(menu);
}); });
@ -421,7 +451,7 @@ export default {
}); });
}, },
/** 提交按钮 */ /** 提交按钮 */
submitForm: function() { submitForm: function () {
this.$refs["form"].validate(valid => { this.$refs["form"].validate(valid => {
if (valid) { if (valid) {
if (this.form.menuId != undefined) { if (this.form.menuId != undefined) {
@ -442,12 +472,13 @@ export default {
}, },
/** 删除按钮操作 */ /** 删除按钮操作 */
handleDelete(row) { handleDelete(row) {
this.$modal.confirm('是否确认删除名称为"' + row.menuName + '"的数据项?').then(function() { this.$modal.confirm('是否确认删除名称为"' + row.menuName + '"的数据项?').then(function () {
return delMenu(row.menuId); return delMenu(row.menuId);
}).then(() => { }).then(() => {
this.getList(); this.getList();
this.$modal.msgSuccess("删除成功"); this.$modal.msgSuccess("删除成功");
}).catch(() => {}); }).catch(() => {
});
} }
} }
}; };

@ -18,7 +18,7 @@ public @interface ReptileLog {
String name() default ""; String name() default "";
/** /**
* url * url(""Stringurl)
*/ */
String url() default ""; String url() default "";

@ -37,4 +37,9 @@ public class RegexConst {
* *
*/ */
public static final String FILE_PATH_REGEX= "^[a-zA-Z]:(((\\\\(?! )[^/:*?<>\\\"\"|\\\\]+)+\\\\?)|(\\\\)?)\\s*$"; public static final String FILE_PATH_REGEX= "^[a-zA-Z]:(((\\\\(?! )[^/:*?<>\\\"\"|\\\\]+)+\\\\?)|(\\\\)?)\\s*$";
/**
* url
*/
public static final String URL_REGEX= "(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]";
} }

@ -1,6 +1,7 @@
package com.xjs.common.aop; package com.xjs.common.aop;
import com.ruoyi.common.core.domain.R; import com.ruoyi.common.core.domain.R;
import com.ruoyi.common.core.utils.StringUtils;
import com.xjs.annotation.ReptileLog; import com.xjs.annotation.ReptileLog;
import com.xjs.business.log.RemoteLogFeign; import com.xjs.business.log.RemoteLogFeign;
import com.xjs.business.log.domain.WebmagicLog; import com.xjs.business.log.domain.WebmagicLog;
@ -90,6 +91,16 @@ public class reptileLogAspect {
String name = ((ReptileLog) annotation).name(); String name = ((ReptileLog) annotation).name();
String url = ((ReptileLog) annotation).url(); String url = ((ReptileLog) annotation).url();
if (StringUtils.isEmpty(url)) {
//拿到形参的值
Object[] args = joinPoint.getArgs();
for (Object arg : args) {
if (arg instanceof String) {
url = (String) arg;
}
}
}
WebmagicLog webmagicLog = new WebmagicLog(); WebmagicLog webmagicLog = new WebmagicLog();
webmagicLog.setName(name); webmagicLog.setName(name);
webmagicLog.setUrl(url); webmagicLog.setUrl(url);

@ -28,7 +28,14 @@ public class WeiXinConst {
public static final String PNG = "png"; public static final String PNG = "png";
public static final String GIF = "gif";
public static final String DOT = "."; public static final String DOT = ".";
/**
* kb
*/
public static final Long SIZE_KB = 30L;
} }

@ -7,10 +7,7 @@ import com.xjs.weixin.service.WeiXinLinkService;
import io.swagger.annotations.Api; import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation; import io.swagger.annotations.ApiOperation;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.*;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
/** /**
* *
@ -26,7 +23,7 @@ public class WeiXinLinkController extends BaseController {
@Autowired @Autowired
private WeiXinLinkService weiXInLinkService; private WeiXinLinkService weiXInLinkService;
@RequiresPermissions("webmagic:weixinlink:get") @RequiresPermissions("webmagic:weixinlink:list")
@GetMapping("/getPicture") @GetMapping("/getPicture")
@ApiOperation("获取文章图片") @ApiOperation("获取文章图片")
public AjaxResult getPicture(@RequestParam("link") String link) { public AjaxResult getPicture(@RequestParam("link") String link) {
@ -34,4 +31,32 @@ public class WeiXinLinkController extends BaseController {
return toAjax(flag); return toAjax(flag);
} }
@RequiresPermissions("webmagic:weixinlink:update")
@PutMapping("/updateSettings")
@ApiOperation("修改参数配置")
public AjaxResult updateSettings(@RequestParam("path") String path) {
boolean flag = weiXInLinkService.updateSettings(path);
return toAjax(flag);
}
@RequiresPermissions("webmagic:weixinlink:list")
@GetMapping("/getSettings")
@ApiOperation("获取参数配置")
public AjaxResult getSettings() {
String settings = weiXInLinkService.getSettings();
return AjaxResult.success("操作成功", settings);
}
@RequiresPermissions("webmagic:weixinlink:reset")
@PutMapping("/resetSettings")
@ApiOperation("重置参数配置")
public AjaxResult resetSettings() {
boolean flag = weiXInLinkService.restSettings();
return toAjax(flag);
}
} }

@ -13,4 +13,23 @@ public interface WeiXinLinkService {
* @param link * @param link
*/ */
Boolean getPicture(String link); Boolean getPicture(String link);
/**
*
* @return str
*/
String getSettings();
/**
*
* @param path
* @return
*/
boolean updateSettings(String path);
/**
*
* @return
*/
boolean restSettings();
} }

@ -1,10 +1,25 @@
package com.xjs.weixin.service.impl; package com.xjs.weixin.service.impl;
import com.ruoyi.common.core.constant.HttpStatus;
import com.ruoyi.common.core.domain.R;
import com.ruoyi.common.core.utils.StringUtils;
import com.ruoyi.common.redis.service.RedisService;
import com.ruoyi.system.api.RemoteConfigService;
import com.xjs.exception.BusinessException;
import com.xjs.weixin.service.WeiXinLinkService; import com.xjs.weixin.service.WeiXinLinkService;
import com.xjs.weixin.task.WeiXinLinkTask; import com.xjs.weixin.task.WeiXinLinkTask;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.util.regex.Pattern;
import static com.xjs.consts.RedisConst.REPTILE_WEIXIN_LINK_COUNT;
import static com.xjs.consts.RegexConst.FILE_PATH_REGEX;
import static com.xjs.consts.RegexConst.URL_REGEX;
import static com.xjs.weixin.consts.WeiXinConst.*;
/** /**
* service * service
* *
@ -16,10 +31,70 @@ public class WeiXinLinkServiceImpl implements WeiXinLinkService {
@Autowired @Autowired
private WeiXinLinkTask weiXinLinkTask; private WeiXinLinkTask weiXinLinkTask;
@Resource
private RemoteConfigService remoteConfigService;
@Autowired
private RedisService redisService;
@Override @Override
public Boolean getPicture(String link) { public Boolean getPicture(String link) {
//校验link有效性
if (StringUtils.isBlank(link)) {
throw new BusinessException("链接地址不能为空");
}
boolean matches = Pattern.matches(URL_REGEX, link);
if (!matches) {
throw new BusinessException("链接格式不正确");
}
Long count = weiXinLinkTask.reptileWeiXinLink(link); Long count = weiXinLinkTask.reptileWeiXinLink(link);
return count != 0L; return count != 0L;
} }
@Override
public String getSettings() {
if (redisService.hasKey(REDIS_KEY)) {
return redisService.getCacheObject(REDIS_KEY);
}
R<String> r = remoteConfigService.getConfigKeyForRPC(CONFIG_KEY);
if (r.getCode() == HttpStatus.SUCCESS) {
return r.getData();
}
return null;
}
@Override
public boolean updateSettings(String path) {
this.checkRunning();
boolean matches = Pattern.matches(FILE_PATH_REGEX, path);
if (!matches) {
throw new BusinessException("文件路径格式不正确");
}
R r = remoteConfigService.editForRPC(CONFIG_KEY, path);
return r.getCode() == HttpStatus.SUCCESS;
}
@Override
public boolean restSettings() {
this.checkRunning();
R r = remoteConfigService.editForRPC(CONFIG_KEY, PATH);
return r.getCode() == HttpStatus.SUCCESS;
}
/**
*
*/
private void checkRunning() {
//判断爬虫是否正在执行,正在执行不可修改!
if (redisService.hasKey(REPTILE_WEIXIN_LINK_COUNT)) {
throw new BusinessException("爬虫正在执行中!暂时无法修改,请稍后再试");
}
}
} }

@ -29,7 +29,7 @@ public class WeiXinLinkTask {
@Autowired @Autowired
private RedisService redisService; private RedisService redisService;
@ReptileLog(name = "微信链接", url = "###") @ReptileLog(name = "微信链接")
public Long reptileWeiXinLink(String link) { public Long reptileWeiXinLink(String link) {
//执行爬虫 //执行爬虫
Spider.create(weiXinLinkProcessor) Spider.create(weiXinLinkProcessor)
@ -39,7 +39,7 @@ public class WeiXinLinkTask {
.setDuplicateRemover(new BloomFilterDuplicateRemover(110000)))//设置url去重过滤器 .setDuplicateRemover(new BloomFilterDuplicateRemover(110000)))//设置url去重过滤器
.addPipeline(weiXinLinkPipeline)//设置爬取之后的数据操作 .addPipeline(weiXinLinkPipeline)//设置爬取之后的数据操作
//.setDownloader(downloader)//设置下载器 //.setDownloader(downloader)//设置下载器
.run();//执行 .run();//同步执行
Integer cache = redisService.getCacheObject(REPTILE_WEIXIN_LINK_COUNT); Integer cache = redisService.getCacheObject(REPTILE_WEIXIN_LINK_COUNT);
redisService.deleteObject(REPTILE_WEIXIN_LINK_COUNT); redisService.deleteObject(REPTILE_WEIXIN_LINK_COUNT);

@ -43,23 +43,43 @@ public class WeiXinLinkPipeline implements Pipeline {
private RemoteConfigService remoteConfigService; private RemoteConfigService remoteConfigService;
@Override @Override
public void process(ResultItems resultItems, Task task) { public void process(ResultItems resultItems, Task task) {
List<String> linkList = resultItems.get("linkList"); List<String> linkList = resultItems.get("linkList");
String title = resultItems.get("title");
//如果磁盘存在该路径则不进行后续操作(已经有数据) ----只能判断当天是否存在同一个文件夹,隔天失效
String appendPath = this.getAppendPath(title);
File file = new File(appendPath);
if (file.exists()) {
return;
}
for (String link : linkList) { for (String link : linkList) {
InputStream inputStream = null;
// 创建GET请求 // 创建GET请求
CloseableHttpClient httpClient = HttpClients.createDefault(); CloseableHttpClient httpClient = HttpClients.createDefault();
HttpGet httpGet = null; HttpGet httpGet = null;
InputStream inputStream = null;
try { try {
httpGet = new HttpGet(link); httpGet = new HttpGet(link);
HttpResponse response = httpClient.execute(httpGet); HttpResponse response = httpClient.execute(httpGet);
if (response.getStatusLine().getStatusCode() == HttpStatus.SUCCESS) { if (response.getStatusLine().getStatusCode() == HttpStatus.SUCCESS) {
inputStream = response.getEntity().getContent(); inputStream = response.getEntity().getContent();
//文件小于30kb则不写入
long contentLength = response.getEntity().getContentLength();
long kb = contentLength / 1024;
if (SIZE_KB > kb) {
continue;
}
//拼接文件后缀
String suffix; String suffix;
if (link.contains(JPEG)) { if (link.contains(JPEG)) {
suffix = JPEG; suffix = JPEG;
@ -67,18 +87,18 @@ public class WeiXinLinkPipeline implements Pipeline {
suffix = JPG; suffix = JPG;
} else if (link.contains(PNG)) { } else if (link.contains(PNG)) {
suffix = PNG; suffix = PNG;
} else if (link.contains(GIF)) {
suffix = GIF;
} else { } else {
suffix = JPG; suffix = JPG;
} }
String fileName = UUID.randomUUID() + DOT + suffix; String fileName = UUID.randomUUID() + DOT + suffix;
this.downloadPicture(inputStream, getPath(), fileName); this.downloadPicture(inputStream, this.getPath(), fileName, title);
} }
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} } finally {
finally {
try { try {
if (httpGet != null) { if (httpGet != null) {
httpGet.clone(); httpGet.clone();
@ -96,7 +116,7 @@ public class WeiXinLinkPipeline implements Pipeline {
inputStream.close(); inputStream.close();
} }
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); log.error(e.getMessage());
} }
} }
} }
@ -104,32 +124,41 @@ public class WeiXinLinkPipeline implements Pipeline {
} }
//链接url下载图片 /**
private void downloadPicture(InputStream inputStream, String path, String fileName) { * url
*
* @param inputStream
* @param path
* @param fileName
* @param title
*/
private void downloadPicture(InputStream inputStream, String path, String fileName, String title) {
try { try {
DataInputStream dataInputStream = new DataInputStream(inputStream); DataInputStream dataInputStream = new DataInputStream(inputStream);
//拼接文件路径 //拼接文件路径
String newPath=path+ File.separator+DateUtil.format(new Date(), DatePattern.NORM_MONTH_PATTERN)+File.separator String appendPath = this.getAppendPath(title);
+DateUtil.format(new Date(), "dd")+"日";
//如果文件夹不存在则创建 //如果文件夹不存在则创建
File file = new File(newPath); File file = new File(appendPath);
if (!file.exists()) { if (!file.exists()) {
file.mkdirs(); boolean mkdirs = file.mkdirs();
} }
String absolutePath = file.getAbsolutePath(); String absolutePath = file.getAbsolutePath();
String absolute = absolutePath + File.separator + fileName; String absolute = absolutePath + File.separator + fileName;
FileOutputStream f = new FileOutputStream(absolute); FileOutputStream f = new FileOutputStream(absolute);
ByteArrayOutputStream out = new ByteArrayOutputStream(); ByteArrayOutputStream out = new ByteArrayOutputStream();
byte[] buffer = new byte[1024]; byte[] bf = new byte[1024];
int length; int length;
while ((length = dataInputStream.read(buffer)) > 0) { while ((length = dataInputStream.read(bf)) > 0) {
out.write(buffer, 0, length); out.write(bf, 0, length);
} }
f.write(out.toByteArray()); f.write(out.toByteArray());
@ -141,6 +170,50 @@ public class WeiXinLinkPipeline implements Pipeline {
} }
/**
*
* @param title
* @return str
*/
private String getAppendPath(String title) {
//过滤title字段
title = title.replace(" ", "");
//替换\ 防止报错
if (title.contains("/")) {
title = title.replace("/", "-");
}
if (title.contains("\\")) {
title = title.replace("\\", "-");
}
if (title.contains(":")) {
title = title.replace(":", "-");
}
if (title.contains("*")) {
title = title.replace("*", "-");
}
if (title.contains("?")) {
title = title.replace("?", "-");
}
if (title.contains("\"")) {
title = title.replace("\"", "-");
}
if (title.contains("<")) {
title = title.replace("<", "-");
}
if (title.contains(">")) {
title = title.replace(">", "-");
}
if (title.contains("|")) {
title = title.replace("|", "-");
}
return this.getPath() + File.separator + DateUtil.format(new Date(),
DatePattern.NORM_MONTH_PATTERN) + File.separator
+ DateUtil.format(new Date(), "dd") + "日" + File.separator + title;
}
/** /**
* -> -> * -> ->
* *

@ -1,5 +1,6 @@
package com.xjs.weixin.webmagic; package com.xjs.weixin.webmagic;
import com.ruoyi.common.core.utils.StringUtils;
import com.ruoyi.common.redis.service.RedisService; import com.ruoyi.common.redis.service.RedisService;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
@ -34,8 +35,15 @@ public class WeiXinLinkProcessor implements PageProcessor {
count = 0; count = 0;
} }
List<String> linkList = page.getHtml().css("section > section > img", "data-src").all(); //获取图片url
List<String> linkList = page.getHtml().css("img", "data-src").all();
//去空
linkList.removeIf(StringUtils::isBlank);
//获取标题
String title = page.getHtml().css("#activity-name","text").get();
page.putField("title",title);
page.putField("linkList",linkList); page.putField("linkList",linkList);
log.info("linkList----{}",linkList); log.info("linkList----{}",linkList);

Loading…
Cancel
Save