实现微信公众号定时爬虫任务

pull/254/head
xjs 3 years ago
parent fcb352b230
commit 51a6773135

@ -18,4 +18,7 @@ public interface RemoteWebmagicWeiXinSouGouFeign {
@GetMapping("/weixin_sougou/taskForPRC") @GetMapping("/weixin_sougou/taskForPRC")
R WeiXinSouGouTaskForPRC() ; R WeiXinSouGouTaskForPRC() ;
@GetMapping("/weixin_official_accounts/taskForPRC")
R WeiXinOfficialAccountsTaskForPRC() ;
} }

@ -22,6 +22,12 @@ public class RemoteWebmagicWeiXinSouGouFactory implements FallbackFactory<Remote
log.error("微信搜狗 爬虫定时任务 降级------服务可能正在运行"); log.error("微信搜狗 爬虫定时任务 降级------服务可能正在运行");
return R.fail("降级处理------服务可能正在运行"); return R.fail("降级处理------服务可能正在运行");
} }
@Override
public R WeiXinOfficialAccountsTaskForPRC() {
log.error("微信公众号 爬虫定时任务 降级------服务可能正在运行");
return R.fail("降级处理------服务可能正在运行");
}
}; };
} }
} }

@ -0,0 +1,35 @@
package com.xjs.job.task.webmagic;
import com.ruoyi.common.core.domain.R;
import com.xjs.business.webmagic.RemoteWebmagicWeiXinSouGouFeign;
import com.xjs.job.aop.TaskLog;
import lombok.extern.log4j.Log4j2;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
/**
*
* @author xiejs
* @since 2022-06-13
*/
@Component("OfficialAccountsTask")
@Log4j2
public class OfficialAccountsTask {
@Resource
private RemoteWebmagicWeiXinSouGouFeign remoteWebmagicWeiXinSouGouFeign;
/**
*
*/
@TaskLog(name = "微信公众号爬虫任务")
public void execute() {
log.info("---------------爬虫-公众号定时任务Start-------------------");
R r = remoteWebmagicWeiXinSouGouFeign.WeiXinOfficialAccountsTaskForPRC();
log.info("爬虫-公众号定时任务结果:code={},msg={},data={}",r.getCode(),r.getMsg(),r.getData());
log.info("---------------爬虫-公众号定时任务end---------------------");
}
}

@ -1,346 +1,346 @@
<template> <template>
<div class="app-container"> <div class="app-container">
<el-form :model="queryParams" ref="queryForm" :inline="true" v-show="showSearch" label-width="68px"> <el-form :model="queryParams" ref="queryForm" :inline="true" v-show="showSearch" label-width="68px">
<el-form-item label="参数名称" prop="configName"> <el-form-item label="参数名称" prop="configName">
<el-input <el-input
v-model="queryParams.configName" v-model="queryParams.configName"
placeholder="请输入参数名称" placeholder="请输入参数名称"
clearable clearable
size="small" size="small"
style="width: 240px" style="width: 240px"
@keyup.enter.native="handleQuery" @keyup.enter.native="handleQuery"
/> />
</el-form-item> </el-form-item>
<el-form-item label="参数键名" prop="configKey"> <el-form-item label="参数键名" prop="configKey">
<el-input <el-input
v-model="queryParams.configKey" v-model="queryParams.configKey"
placeholder="请输入参数键名" placeholder="请输入参数键名"
clearable clearable
size="small" size="small"
style="width: 240px" style="width: 240px"
@keyup.enter.native="handleQuery" @keyup.enter.native="handleQuery"
/> />
</el-form-item> </el-form-item>
<el-form-item label="系统内置" prop="configType"> <el-form-item label="系统内置" prop="configType">
<el-select v-model="queryParams.configType" placeholder="系统内置" clearable size="small"> <el-select v-model="queryParams.configType" placeholder="系统内置" clearable size="small">
<el-option <el-option
v-for="dict in dict.type.sys_yes_no" v-for="dict in dict.type.sys_yes_no"
:key="dict.value" :key="dict.value"
:label="dict.label" :label="dict.label"
:value="dict.value" :value="dict.value"
/> />
</el-select> </el-select>
</el-form-item> </el-form-item>
<el-form-item label="创建时间"> <el-form-item label="创建时间">
<el-date-picker <el-date-picker
v-model="dateRange" v-model="dateRange"
size="small" size="small"
style="width: 240px" style="width: 240px"
value-format="yyyy-MM-dd" value-format="yyyy-MM-dd"
type="daterange" type="daterange"
range-separator="-" range-separator="-"
start-placeholder="开始日期" start-placeholder="开始日期"
end-placeholder="结束日期" end-placeholder="结束日期"
></el-date-picker> ></el-date-picker>
</el-form-item> </el-form-item>
<el-form-item> <el-form-item>
<el-button type="primary" icon="el-icon-search" size="mini" @click="handleQuery"></el-button> <el-button type="primary" icon="el-icon-search" size="mini" @click="handleQuery"></el-button>
<el-button icon="el-icon-refresh" size="mini" @click="resetQuery"></el-button> <el-button icon="el-icon-refresh" size="mini" @click="resetQuery"></el-button>
</el-form-item> </el-form-item>
</el-form> </el-form>
<el-row :gutter="10" class="mb8"> <el-row :gutter="10" class="mb8">
<el-col :span="1.5"> <el-col :span="1.5">
<el-button <el-button
type="primary" type="primary"
plain plain
icon="el-icon-plus" icon="el-icon-plus"
size="mini" size="mini"
@click="handleAdd" @click="handleAdd"
v-hasPermi="['system:config:add']" v-hasPermi="['system:config:add']"
>新增</el-button> >新增</el-button>
</el-col> </el-col>
<el-col :span="1.5"> <el-col :span="1.5">
<el-button <el-button
type="success" type="success"
plain plain
icon="el-icon-edit" icon="el-icon-edit"
size="mini" size="mini"
:disabled="single" :disabled="single"
@click="handleUpdate" @click="handleUpdate"
v-hasPermi="['system:config:edit']" v-hasPermi="['system:config:edit']"
>修改</el-button> >修改</el-button>
</el-col> </el-col>
<el-col :span="1.5"> <el-col :span="1.5">
<el-button <el-button
type="danger" type="danger"
plain plain
icon="el-icon-delete" icon="el-icon-delete"
size="mini" size="mini"
:disabled="multiple" :disabled="multiple"
@click="handleDelete" @click="handleDelete"
v-hasPermi="['system:config:remove']" v-hasPermi="['system:config:remove']"
>删除</el-button> >删除</el-button>
</el-col> </el-col>
<el-col :span="1.5"> <el-col :span="1.5">
<el-button <el-button
type="warning" type="warning"
plain plain
icon="el-icon-download" icon="el-icon-download"
size="mini" size="mini"
@click="handleExport" @click="handleExport"
v-hasPermi="['system:config:export']" v-hasPermi="['system:config:export']"
>导出</el-button> >导出</el-button>
</el-col> </el-col>
<el-col :span="1.5"> <el-col :span="1.5">
<el-button <el-button
type="danger" type="danger"
plain plain
icon="el-icon-refresh" icon="el-icon-refresh"
size="mini" size="mini"
@click="handleRefreshCache" @click="handleRefreshCache"
v-hasPermi="['system:config:remove']" v-hasPermi="['system:config:remove']"
>刷新缓存</el-button> >刷新缓存</el-button>
</el-col> </el-col>
<right-toolbar :showSearch.sync="showSearch" @queryTable="getList"></right-toolbar> <right-toolbar :showSearch.sync="showSearch" @queryTable="getList"></right-toolbar>
</el-row> </el-row>
<el-table v-loading="loading" :data="configList" @selection-change="handleSelectionChange"> <el-table v-loading="loading" :data="configList" @selection-change="handleSelectionChange">
<el-table-column type="selection" width="55" align="center" /> <el-table-column type="selection" width="55" align="center" />
<el-table-column label="参数主键" align="center" prop="configId" /> <el-table-column label="参数主键" align="center" prop="configId" />
<el-table-column label="参数名称" align="center" prop="configName" :show-overflow-tooltip="true" /> <el-table-column label="参数名称" align="center" prop="configName" :show-overflow-tooltip="true" />
<el-table-column label="参数键名" align="center" prop="configKey" :show-overflow-tooltip="true" /> <el-table-column label="参数键名" align="center" prop="configKey" :show-overflow-tooltip="true" />
<el-table-column label="参数键值" align="center" prop="configValue" /> <el-table-column label="参数键值" align="center" prop="configValue" />
<el-table-column label="系统内置" align="center" prop="configType"> <el-table-column label="系统内置" align="center" prop="configType">
<template slot-scope="scope"> <template slot-scope="scope">
<dict-tag :options="dict.type.sys_yes_no" :value="scope.row.configType"/> <dict-tag :options="dict.type.sys_yes_no" :value="scope.row.configType"/>
</template> </template>
</el-table-column> </el-table-column>
<el-table-column label="备注" align="center" prop="remark" :show-overflow-tooltip="true" /> <el-table-column label="备注" align="center" prop="remark" :show-overflow-tooltip="true" />
<el-table-column label="创建时间" align="center" prop="createTime" width="180"> <el-table-column label="创建时间" align="center" prop="createTime" width="180">
<template slot-scope="scope"> <template slot-scope="scope">
<span>{{ parseTime(scope.row.createTime) }}</span> <span>{{ parseTime(scope.row.createTime) }}</span>
</template> </template>
</el-table-column> </el-table-column>
<el-table-column label="操作" align="center" class-name="small-padding fixed-width"> <el-table-column label="操作" align="center" class-name="small-padding fixed-width">
<template slot-scope="scope"> <template slot-scope="scope">
<el-button <el-button
size="mini" size="mini"
type="text" type="text"
icon="el-icon-edit" icon="el-icon-edit"
@click="handleUpdate(scope.row)" @click="handleUpdate(scope.row)"
v-hasPermi="['system:config:edit']" v-hasPermi="['system:config:edit']"
>修改</el-button> >修改</el-button>
<el-button <el-button
size="mini" size="mini"
type="text" type="text"
icon="el-icon-delete" icon="el-icon-delete"
@click="handleDelete(scope.row)" @click="handleDelete(scope.row)"
v-hasPermi="['system:config:remove']" v-hasPermi="['system:config:remove']"
>删除</el-button> >删除</el-button>
</template> </template>
</el-table-column> </el-table-column>
</el-table> </el-table>
<pagination <pagination
v-show="total>0" v-show="total>0"
:total="total" :total="total"
:page.sync="queryParams.pageNum" :page.sync="queryParams.pageNum"
:limit.sync="queryParams.pageSize" :limit.sync="queryParams.pageSize"
@pagination="getList" @pagination="getList"
/> />
<!-- 添加或修改参数配置对话框 --> <!-- 添加或修改参数配置对话框 -->
<el-dialog :title="title" :visible.sync="open" width="500px" append-to-body> <el-dialog :title="title" :visible.sync="open" width="500px" append-to-body>
<el-form ref="form" :model="form" :rules="rules" label-width="80px"> <el-form ref="form" :model="form" :rules="rules" label-width="80px">
<el-form-item label="参数名称" prop="configName"> <el-form-item label="参数名称" prop="configName">
<el-input v-model="form.configName" placeholder="请输入参数名称" /> <el-input v-model="form.configName" placeholder="请输入参数名称" />
</el-form-item> </el-form-item>
<el-form-item label="参数键名" prop="configKey"> <el-form-item label="参数键名" prop="configKey">
<el-input v-model="form.configKey" placeholder="请输入参数键名" /> <el-input v-model="form.configKey" placeholder="请输入参数键名" />
</el-form-item> </el-form-item>
<el-form-item label="参数键值" prop="configValue"> <el-form-item label="参数键值" prop="configValue">
<el-input v-model="form.configValue" placeholder="请输入参数键值" /> <el-input type="textarea" :rows="4" v-model="form.configValue" placeholder="请输入参数键值" />
</el-form-item> </el-form-item>
<el-form-item label="系统内置" prop="configType"> <el-form-item label="系统内置" prop="configType">
<el-radio-group v-model="form.configType"> <el-radio-group v-model="form.configType">
<el-radio <el-radio
v-for="dict in dict.type.sys_yes_no" v-for="dict in dict.type.sys_yes_no"
:key="dict.value" :key="dict.value"
:label="dict.value" :label="dict.value"
>{{dict.label}}</el-radio> >{{dict.label}}</el-radio>
</el-radio-group> </el-radio-group>
</el-form-item> </el-form-item>
<el-form-item label="备注" prop="remark"> <el-form-item label="备注" prop="remark">
<el-input v-model="form.remark" type="textarea" placeholder="请输入内容" /> <el-input v-model="form.remark" type="textarea" placeholder="请输入内容" />
</el-form-item> </el-form-item>
</el-form> </el-form>
<div slot="footer" class="dialog-footer"> <div slot="footer" class="dialog-footer">
<el-button type="primary" @click="submitForm"> </el-button> <el-button type="primary" @click="submitForm"> </el-button>
<el-button @click="cancel"> </el-button> <el-button @click="cancel"> </el-button>
</div> </div>
</el-dialog> </el-dialog>
</div> </div>
</template> </template>
<script> <script>
import { listConfig, getConfig, delConfig, addConfig, updateConfig, refreshCache } from "@/api/system/config"; import { listConfig, getConfig, delConfig, addConfig, updateConfig, refreshCache } from "@/api/system/config";
export default { export default {
name: "Config", name: "Config",
dicts: ['sys_yes_no'], dicts: ['sys_yes_no'],
data() { data() {
return { return {
// //
loading: true, loading: true,
// //
ids: [], ids: [],
// //
single: true, single: true,
// //
multiple: true, multiple: true,
// //
showSearch: true, showSearch: true,
// //
total: 0, total: 0,
// //
configList: [], configList: [],
// //
title: "", title: "",
// //
open: false, open: false,
// //
dateRange: [], dateRange: [],
// //
queryParams: { queryParams: {
pageNum: 1, pageNum: 1,
pageSize: 10, pageSize: 10,
configName: undefined, configName: undefined,
configKey: undefined, configKey: undefined,
configType: undefined configType: undefined
}, },
// //
form: {}, form: {},
// //
rules: { rules: {
configName: [ configName: [
{ required: true, message: "参数名称不能为空", trigger: "blur" } { required: true, message: "参数名称不能为空", trigger: "blur" }
], ],
configKey: [ configKey: [
{ required: true, message: "参数键名不能为空", trigger: "blur" } { required: true, message: "参数键名不能为空", trigger: "blur" }
], ],
configValue: [ configValue: [
{ required: true, message: "参数键值不能为空", trigger: "blur" } { required: true, message: "参数键值不能为空", trigger: "blur" }
] ]
} }
}; };
}, },
created() { created() {
this.getList(); this.getList();
}, },
methods: { methods: {
/** 查询参数列表 */ /** 查询参数列表 */
getList() { getList() {
this.loading = true; this.loading = true;
listConfig(this.addDateRange(this.queryParams, this.dateRange)).then(response => { listConfig(this.addDateRange(this.queryParams, this.dateRange)).then(response => {
this.configList = response.rows; this.configList = response.rows;
this.total = response.total; this.total = response.total;
this.loading = false; this.loading = false;
} }
); );
}, },
// //
cancel() { cancel() {
this.open = false; this.open = false;
this.reset(); this.reset();
}, },
// //
reset() { reset() {
this.form = { this.form = {
configId: undefined, configId: undefined,
configName: undefined, configName: undefined,
configKey: undefined, configKey: undefined,
configValue: undefined, configValue: undefined,
configType: "Y", configType: "Y",
remark: undefined remark: undefined
}; };
this.resetForm("form"); this.resetForm("form");
}, },
/** 搜索按钮操作 */ /** 搜索按钮操作 */
handleQuery() { handleQuery() {
this.queryParams.pageNum = 1; this.queryParams.pageNum = 1;
this.getList(); this.getList();
}, },
/** 重置按钮操作 */ /** 重置按钮操作 */
resetQuery() { resetQuery() {
this.dateRange = []; this.dateRange = [];
this.resetForm("queryForm"); this.resetForm("queryForm");
this.handleQuery(); this.handleQuery();
}, },
/** 新增按钮操作 */ /** 新增按钮操作 */
handleAdd() { handleAdd() {
this.reset(); this.reset();
this.open = true; this.open = true;
this.title = "添加参数"; this.title = "添加参数";
}, },
// //
handleSelectionChange(selection) { handleSelectionChange(selection) {
this.ids = selection.map(item => item.configId) this.ids = selection.map(item => item.configId)
this.single = selection.length!=1 this.single = selection.length!=1
this.multiple = !selection.length this.multiple = !selection.length
}, },
/** 修改按钮操作 */ /** 修改按钮操作 */
handleUpdate(row) { handleUpdate(row) {
this.reset(); this.reset();
const configId = row.configId || this.ids const configId = row.configId || this.ids
getConfig(configId).then(response => { getConfig(configId).then(response => {
this.form = response.data; this.form = response.data;
this.open = true; this.open = true;
this.title = "修改参数"; this.title = "修改参数";
}); });
}, },
/** 提交按钮 */ /** 提交按钮 */
submitForm: function() { submitForm: function() {
this.$refs["form"].validate(valid => { this.$refs["form"].validate(valid => {
if (valid) { if (valid) {
if (this.form.configId != undefined) { if (this.form.configId != undefined) {
updateConfig(this.form).then(response => { updateConfig(this.form).then(response => {
this.$modal.msgSuccess("修改成功"); this.$modal.msgSuccess("修改成功");
this.open = false; this.open = false;
this.getList(); this.getList();
}); });
} else { } else {
addConfig(this.form).then(response => { addConfig(this.form).then(response => {
this.$modal.msgSuccess("新增成功"); this.$modal.msgSuccess("新增成功");
this.open = false; this.open = false;
this.getList(); this.getList();
}); });
} }
} }
}); });
}, },
/** 删除按钮操作 */ /** 删除按钮操作 */
handleDelete(row) { handleDelete(row) {
const configIds = row.configId || this.ids; const configIds = row.configId || this.ids;
this.$modal.confirm('是否确认删除参数编号为"' + configIds + '"的数据项?').then(function() { this.$modal.confirm('是否确认删除参数编号为"' + configIds + '"的数据项?').then(function() {
return delConfig(configIds); return delConfig(configIds);
}).then(() => { }).then(() => {
this.getList(); this.getList();
this.$modal.msgSuccess("删除成功"); this.$modal.msgSuccess("删除成功");
}).catch(() => {}); }).catch(() => {});
}, },
/** 导出按钮操作 */ /** 导出按钮操作 */
handleExport() { handleExport() {
this.download('system/config/export', { this.download('system/config/export', {
...this.queryParams ...this.queryParams
}, `config_${new Date().getTime()}.xlsx`) }, `config_${new Date().getTime()}.xlsx`)
}, },
/** 刷新缓存按钮操作 */ /** 刷新缓存按钮操作 */
handleRefreshCache() { handleRefreshCache() {
refreshCache().then(() => { refreshCache().then(() => {
this.$modal.msgSuccess("刷新成功"); this.$modal.msgSuccess("刷新成功");
}); });
} }
} }
}; };
</script> </script>

@ -60,6 +60,16 @@ public class RedisConst {
*/ */
public static final String REPTILE_WEIXIN_LINK_COUNT = "bussiness:reptile:weixin.link.count"; public static final String REPTILE_WEIXIN_LINK_COUNT = "bussiness:reptile:weixin.link.count";
/**
* weixin.official
*/
public static final String REPTILE_WEIXIN_OFFICIAL_COUNT = "bussiness:reptile:weixin.official.count";
/**
* :temp:official_accounts:name
*/
public static final String REPTILE_WEIXIN_OFFICIAL_NAME = "temp:official_accounts:name";
/** /**
*zol.phone *zol.phone
*/ */

@ -40,6 +40,9 @@ public class ReptileConst {
*/ */
public static final String WEIXIN_SOUGOU_URL= "https://weixin.sogou.com/"; public static final String WEIXIN_SOUGOU_URL= "https://weixin.sogou.com/";
public static final String WEIXIN_OFFCIAL_URL= "https://weixin.sogou.com/weixin?type=1&s_from=input&query=";
/** /**
* url * url
*/ */

@ -0,0 +1,25 @@
package com.xjs.utils;
import cn.hutool.core.util.RandomUtil;
/**
*
*
* @author xiejs
* @since 2022-06-13
*/
public class RandomUtils {
private static String[] zm = {"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"};
/**
*
* @return
*/
public static String randomZm() {
int i = RandomUtil.randomInt(0, 25);
return zm[i];
}
}

@ -0,0 +1,89 @@
package com.xjs.common.util;
import java.io.*;
/**
*
* @author xiejs
* @since 2022-06-13
*/
public class WeiXinUtils {
public static String filterTitle(String title) {
//过滤title字段
title = title.replace(" ", "");
//替换\ 防止报错
if (title.contains("/")) {
title = title.replace("/", "-");
}
if (title.contains("\\")) {
title = title.replace("\\", "-");
}
if (title.contains(":")) {
title = title.replace(":", "-");
}
if (title.contains("*")) {
title = title.replace("*", "-");
}
if (title.contains("?")) {
title = title.replace("?", "-");
}
if (title.contains("\"")) {
title = title.replace("\"", "-");
}
if (title.contains("<")) {
title = title.replace("<", "-");
}
if (title.contains(">")) {
title = title.replace(">", "-");
}
if (title.contains("|")) {
title = title.replace("|", "-");
}
return title;
}
/**
* url
*
* @param inputStream
* @param path
* @param fileName
* @param title
* @param appendPath
*/
public static void downloadPicture(InputStream inputStream, String path, String fileName, String title,String appendPath) {
try {
DataInputStream dataInputStream = new DataInputStream(inputStream);
//如果文件夹不存在则创建
File file = new File(appendPath);
if (!file.exists()) {
boolean mkdirs = file.mkdirs();
}
String absolutePath = file.getAbsolutePath();
String absolute = absolutePath + File.separator + fileName;
FileOutputStream f = new FileOutputStream(absolute);
ByteArrayOutputStream out = new ByteArrayOutputStream();
byte[] bf = new byte[1024];
int length;
while ((length = dataInputStream.read(bf)) > 0) {
out.write(bf, 0, length);
}
f.write(out.toByteArray());
dataInputStream.close();
f.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}

@ -17,11 +17,18 @@ public class WeiXinConst {
*/ */
public static final String REDIS_KEY = "sys_config:xjs.webmagic.wechatPicture"; public static final String REDIS_KEY = "sys_config:xjs.webmagic.wechatPicture";
public static final String REDIS_KEY_OFFICIAL = "sys_config:xjs.webmagic.official_accounts";
/** /**
* key * key
*/ */
public static final String CONFIG_KEY = "xjs.webmagic.wechatPicture"; public static final String CONFIG_KEY = "xjs.webmagic.wechatPicture";
public static final String CONFIG_KEY_OFFICIAL = "xjs:webmagic:official_accounts";
public static final String JPEG = "jpeg"; public static final String JPEG = "jpeg";
public static final String JPG = "jpg"; public static final String JPG = "jpg";

@ -0,0 +1,33 @@
package com.xjs.weixin.controller;
import com.ruoyi.common.core.domain.R;
import com.xjs.weixin.task.OfficialAccountsTask;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
/**
* controller
* @author xiejs
* @since 2022-06-13
*/
@RestController
@RequestMapping("weixin_official_accounts")
@Api(tags = "爬虫模块-微信公众号")
public class OfficialAccountsController {
@Autowired
private OfficialAccountsTask officialAccountsTask;
//----------------------远程rpc调用---------------------------
@GetMapping("taskForPRC")
@ApiOperation("供定时任务服务RPC远程调用")
public R WeiXinOfficialAccountsTaskForPRC() {
officialAccountsTask.execute();
return R.ok();
}
}

@ -0,0 +1,30 @@
package com.xjs.weixin.controller;
import com.xjs.weixin.task.OfficialAccountsTask;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
/**
* @author xiejs
* @since 2022-06-13
*/
@RequestMapping("test")
@RestController
@Api(tags = "测试")
public class TestController {
@Autowired
private OfficialAccountsTask officialAccountsTask;
@GetMapping
@ApiOperation("微信公众号")
public String test() {
officialAccountsTask.execute();
return "success";
}
}

@ -0,0 +1,118 @@
package com.xjs.weixin.task;
import com.ruoyi.common.core.constant.HttpStatus;
import com.ruoyi.common.core.domain.R;
import com.ruoyi.common.core.utils.StringUtils;
import com.ruoyi.common.redis.service.RedisService;
import com.ruoyi.system.api.RemoteConfigService;
import com.xjs.annotation.ReptileLog;
import com.xjs.weixin.webmagic.OfficialAccountsPipeline;
import com.xjs.weixin.webmagic.OfficialAccountsProcessor;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.scheduler.BloomFilterDuplicateRemover;
import us.codecraft.webmagic.scheduler.QueueScheduler;
import javax.annotation.Resource;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static com.xjs.consts.RedisConst.REPTILE_WEIXIN_OFFICIAL_COUNT;
import static com.xjs.consts.RedisConst.REPTILE_WEIXIN_OFFICIAL_NAME;
import static com.xjs.consts.ReptileConst.WEIXIN_OFFCIAL_URL;
import static com.xjs.weixin.consts.WeiXinConst.CONFIG_KEY_OFFICIAL;
import static com.xjs.weixin.consts.WeiXinConst.REDIS_KEY_OFFICIAL;
/**
*
*
* @author xiejs
* @since 2022-06-13
*/
@Component
@SuppressWarnings("all")
public class OfficialAccountsTask {
@Autowired
private OfficialAccountsProcessor officialAccountsProcessor;
@Autowired
private RedisService redisService;
@Autowired
private OfficialAccountsPipeline officialAccountsPipeline;
@Resource
private RemoteConfigService remoteConfigService;
//解决aop自调用不生成代理对象问题
@Autowired
private OfficialAccountsTask officialAccountsTask;
public void execute() {
List<String> names = this.convert();
for (String name : names) {
String url = WEIXIN_OFFCIAL_URL + name;
redisService.setCacheObject(REPTILE_WEIXIN_OFFICIAL_NAME,name);
Long aLong = officialAccountsTask.reptileWeiXinOfficialAccount(url);
}
}
@ReptileLog(name = "微信公众号")
public Long reptileWeiXinOfficialAccount(String url) {
//执行爬虫
Spider.create(officialAccountsProcessor)
.addUrl(url)//设置爬取地址
.thread(30)//设置爬取线程数
.setScheduler(new QueueScheduler()
.setDuplicateRemover(new BloomFilterDuplicateRemover(110000)))//设置url去重过滤器
//.setDownloader(downloader)//设置下载器
.addPipeline(officialAccountsPipeline)//设置爬取之后的数据操作
.run();//同步执行
Integer cache = redisService.getCacheObject(REPTILE_WEIXIN_OFFICIAL_COUNT);
redisService.deleteObject(REPTILE_WEIXIN_OFFICIAL_COUNT);
if (cache != null) {
return Long.valueOf(cache);
}
return 0L;
}
private List<String> convert() {
String str = this.getConfigSetting();
if (StringUtils.isNotEmpty(str) && !str.contains(",")) {
return Arrays.asList(str);
}
if (str.contains(",")) {
String[] split = str.split(",");
return Arrays.asList(split);
}
return new ArrayList<>();
}
/**
*
*
* @return str
*/
private String getConfigSetting() {
if (redisService.hasKey(REDIS_KEY_OFFICIAL)) {
return redisService.getCacheObject(REDIS_KEY_OFFICIAL);
}
R<String> r = remoteConfigService.getConfigKeyForRPC(CONFIG_KEY_OFFICIAL);
if (r.getCode() == HttpStatus.SUCCESS) {
return r.getData();
}
return null;
}
}

@ -0,0 +1,185 @@
package com.xjs.weixin.webmagic;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.date.DatePattern;
import cn.hutool.core.date.DateUtil;
import cn.hutool.core.util.RandomUtil;
import com.ruoyi.common.core.constant.HttpStatus;
import com.ruoyi.common.core.utils.StringUtils;
import com.ruoyi.common.redis.service.RedisService;
import com.ruoyi.system.api.RemoteConfigService;
import com.xjs.common.util.WeiXinUtils;
import com.xjs.utils.RandomUtils;
import com.xjs.weixin.consts.WeiXinConst;
import lombok.extern.log4j.Log4j2;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;
import javax.annotation.Resource;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import java.util.List;
import static com.xjs.weixin.consts.WeiXinConst.*;
/**
*
*
* @author xiejs
* @since 2022-06-13
*/
@Component
@Log4j2
@SuppressWarnings("all")
public class OfficialAccountsPipeline implements Pipeline {
@Autowired
private RedisService redisService;
@Resource
private RemoteConfigService remoteConfigService;
@Override
public void process(ResultItems resultItems, Task task) {
List<String> linkList = resultItems.get("linkList");
String title = resultItems.get("title");
if (CollUtil.isNotEmpty(linkList) && StringUtils.isNotEmpty(title)) {
String appendPath = this.getAppendPath(title);
File file = new File(appendPath);
if (file.exists()) {
return;
}
for (String link : linkList) {
InputStream inputStream = null;
// 创建GET请求
CloseableHttpClient httpClient = HttpClients.createDefault();
HttpGet httpGet = null;
try {
httpGet = new HttpGet(link);
HttpResponse response = httpClient.execute(httpGet);
if (response.getStatusLine().getStatusCode() == HttpStatus.SUCCESS) {
inputStream = response.getEntity().getContent();
//文件小于30kb则不写入
long contentLength = response.getEntity().getContentLength();
long kb = contentLength / 1024;
if (SIZE_KB > kb) {
continue;
}
//拼接文件后缀
String suffix;
if (link.contains(JPEG)) {
suffix = JPEG;
} else if (link.contains(JPG)) {
suffix = JPG;
} else if (link.contains(PNG)) {
suffix = PNG;
} else if (link.contains(GIF)) {
continue;
} else {
suffix = JPG;
}
String chars = "ABCDEFGHIZKLMNOPQRSTUVWXYZ";
char c = chars.charAt((int) (Math.random() * 1));
String fileName = RandomUtils.randomZm() + RandomUtil.randomLong(100000, 1000000) + DOT + suffix;
this.downloadPicture(inputStream, this.getPath(), fileName, title);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (httpGet != null) {
httpGet.clone();
}
} catch (CloneNotSupportedException e) {
e.printStackTrace();
}
try {
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
if (inputStream != null) {
inputStream.close();
}
} catch (IOException e) {
log.error(e.getMessage());
}
}
}
}
}
/**
* url
*
* @param inputStream
* @param path
* @param fileName
* @param title
*/
private void downloadPicture(InputStream inputStream, String path, String fileName, String title) {
WeiXinUtils.downloadPicture(inputStream, path, fileName, title, this.getAppendPath(title));
}
/**
*
*
* @param title
* @return str
*/
private String getAppendPath(String title) {
title = WeiXinUtils.filterTitle(title);
return this.getPath() + File.separator + DateUtil.format(new Date(),
DatePattern.NORM_MONTH_PATTERN) + File.separator
+ DateUtil.format(new Date(), "dd") + "日" + File.separator + title;
}
/**
* -> ->
*
* @return
*/
private String getPath() {
//磁盘路径
String path;
//判断redis中是否存在
Boolean hasKey = redisService.hasKey(REDIS_KEY);
if (hasKey) {
path = redisService.getCacheObject(REDIS_KEY);
} else {
String data = remoteConfigService.getConfigKeyForRPC(CONFIG_KEY).getData();
if (StringUtils.isNotEmpty(data)) {
path = data;
} else {
path = WeiXinConst.PATH;
}
}
return path;
}
}

@ -0,0 +1,155 @@
package com.xjs.weixin.webmagic;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.date.DateTime;
import cn.hutool.core.date.DateUtil;
import com.ruoyi.common.core.utils.StringUtils;
import com.ruoyi.common.redis.service.RedisService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static com.xjs.consts.RedisConst.REPTILE_WEIXIN_OFFICIAL_COUNT;
import static com.xjs.consts.RedisConst.REPTILE_WEIXIN_OFFICIAL_NAME;
import static com.xjs.consts.ReptileConst.WEIXIN_SOUGOU_URL;
/**
*
*
* @author xiejs
* @since 2022-06-13
*/
@Component
public class OfficialAccountsProcessor implements PageProcessor {
@Autowired
private RedisService redisService;
/**
* key
*/
private static final String headerKey = "User-Agent";
/**
* value
*/
private static final String headerValue = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36";
@Override
public void process(Page page) {
try {
Integer count = redisService.getCacheObject(REPTILE_WEIXIN_OFFICIAL_COUNT);
if (count == null) {
count = 0;
}
List<Selectable> lis = page.getHtml().css(".news-box > .news-list2 > li").nodes();
ArrayList<String> urls = new ArrayList<>();
for (Selectable li : lis) {
String href = li.css("dl > dd > a", "href").get();
Object cacheObject = redisService.getCacheObject(REPTILE_WEIXIN_OFFICIAL_NAME);
String str = (String) cacheObject;
if (StringUtils.isNotEmpty(str)) {
String text = li.css(".txt-box > .tit > a > em", "text").get();
String textA = li.css(".txt-box > .tit > a", "text").get();
if (StringUtils.isNotEmpty(textA)) {
continue;
}
if (str.equals(text)) {
//只爬取当天的文章
String date = li.css("dl > dd > span > script").get();
Pattern pattern = Pattern.compile("'(.*?)'");
Matcher matcher = pattern.matcher(date);
while (matcher.find()) {
//拿到时间戳
String word = matcher.group(1);
DateTime dateTime = DateUtil.date(Long.parseLong(word) * 1000);
String dateStr = dateTime.toDateStr();
String nowDateStr = DateUtil.formatDate(new Date());
if (dateStr.equals(nowDateStr)) {
urls.add(WEIXIN_SOUGOU_URL + href);
}
}
}
}
redisService.deleteObject(REPTILE_WEIXIN_OFFICIAL_NAME);
}
page.addTargetRequests(urls);
String js = page.getHtml().get();
if (js.contains("window.location.replace(url)")) {
String function = js.substring(js.indexOf("{") + 1, js.indexOf("}"));
//System.out.println("function="+function);
//正则匹配 ' ' 里面的内容
Pattern pattern = Pattern.compile("'(.*?)'");
Matcher matcher = pattern.matcher(function);
StringBuilder stringBuilder = new StringBuilder();
while (matcher.find()) {
String word = matcher.group(1);
stringBuilder.append(word);
}
page.addTargetRequests(Collections.singletonList(stringBuilder.toString()));
}
//获取图片url
List<String> linkList = page.getHtml().css("img", "data-src").all();
//去空
linkList.removeIf(StringUtils::isBlank);
//获取标题
String title = page.getHtml().css("#activity-name", "text").get();
if (StringUtils.isNotEmpty(title)) {
page.putField("title", title);
}
if (CollUtil.isNotEmpty(linkList)) {
page.putField("linkList", linkList);
}
count = linkList.size();
redisService.setCacheObject(REPTILE_WEIXIN_OFFICIAL_COUNT, count);
} catch (Exception e) {
e.printStackTrace();
} finally {
redisService.expire(REPTILE_WEIXIN_OFFICIAL_COUNT, 3, TimeUnit.HOURS);
redisService.expire(REPTILE_WEIXIN_OFFICIAL_NAME, 3, TimeUnit.HOURS);
}
}
@Override
public Site getSite() {
return Site.me()
//.addHeader(headerKey, headerValue)
.addHeader(headerKey, headerValue)
.setCharset("utf8")//设置字符编码
.setTimeOut(2000)//设置超时时间
.setRetrySleepTime(100)//设置重试间隔时间
.setCycleRetryTimes(10)//设置重试次数
.setSleepTime(1)//设置两个页面之间的间隔时间
;
}
}

@ -6,6 +6,7 @@ import com.ruoyi.common.core.constant.HttpStatus;
import com.ruoyi.common.core.utils.StringUtils; import com.ruoyi.common.core.utils.StringUtils;
import com.ruoyi.common.redis.service.RedisService; import com.ruoyi.common.redis.service.RedisService;
import com.ruoyi.system.api.RemoteConfigService; import com.ruoyi.system.api.RemoteConfigService;
import com.xjs.common.util.WeiXinUtils;
import com.xjs.weixin.consts.WeiXinConst; import com.xjs.weixin.consts.WeiXinConst;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.apache.http.HttpResponse; import org.apache.http.HttpResponse;
@ -43,9 +44,6 @@ public class WeiXinLinkPipeline implements Pipeline {
private RemoteConfigService remoteConfigService; private RemoteConfigService remoteConfigService;
@Override @Override
public void process(ResultItems resultItems, Task task) { public void process(ResultItems resultItems, Task task) {
@ -134,79 +132,18 @@ public class WeiXinLinkPipeline implements Pipeline {
*/ */
private void downloadPicture(InputStream inputStream, String path, String fileName, String title) { private void downloadPicture(InputStream inputStream, String path, String fileName, String title) {
try { WeiXinUtils.downloadPicture(inputStream, path, fileName, title, this.getAppendPath(title));
DataInputStream dataInputStream = new DataInputStream(inputStream);
//拼接文件路径
String appendPath = this.getAppendPath(title);
//如果文件夹不存在则创建
File file = new File(appendPath);
if (!file.exists()) {
boolean mkdirs = file.mkdirs();
}
String absolutePath = file.getAbsolutePath();
String absolute = absolutePath + File.separator + fileName;
FileOutputStream f = new FileOutputStream(absolute);
ByteArrayOutputStream out = new ByteArrayOutputStream();
byte[] bf = new byte[1024];
int length;
while ((length = dataInputStream.read(bf)) > 0) {
out.write(bf, 0, length);
}
f.write(out.toByteArray());
dataInputStream.close();
f.close();
} catch (IOException e) {
e.printStackTrace();
}
} }
/** /**
* *
*
* @param title * @param title
* @return str * @return str
*/ */
private String getAppendPath(String title) { private String getAppendPath(String title) {
//过滤title字段 title = WeiXinUtils.filterTitle(title);
title = title.replace(" ", "");
//替换\ 防止报错
if (title.contains("/")) {
title = title.replace("/", "-");
}
if (title.contains("\\")) {
title = title.replace("\\", "-");
}
if (title.contains(":")) {
title = title.replace(":", "-");
}
if (title.contains("*")) {
title = title.replace("*", "-");
}
if (title.contains("?")) {
title = title.replace("?", "-");
}
if (title.contains("\"")) {
title = title.replace("\"", "-");
}
if (title.contains("<")) {
title = title.replace("<", "-");
}
if (title.contains(">")) {
title = title.replace(">", "-");
}
if (title.contains("|")) {
title = title.replace("|", "-");
}
return this.getPath() + File.separator + DateUtil.format(new Date(), return this.getPath() + File.separator + DateUtil.format(new Date(),
DatePattern.NORM_MONTH_PATTERN) + File.separator DatePattern.NORM_MONTH_PATTERN) + File.separator

Loading…
Cancel
Save