From 80590e0a87a9b65e4c179159536a68c1f0c60207 Mon Sep 17 00:00:00 2001 From: xiaoxiamo <82970607@qq.com> Date: Sun, 18 Aug 2024 15:59:23 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20=E6=95=8F=E6=84=9F=E8=AF=8D?= =?UTF-8?q?=E6=B1=87=E5=A4=84=E7=90=86=EF=BC=88=E4=B8=80=EF=BC=89=EF=BC=9A?= =?UTF-8?q?=E5=88=9D=E6=AD=A5=E6=8F=90=E4=BA=A4=EF=BC=8C=E8=AF=8D=E5=85=B8?= =?UTF-8?q?=E5=AD=98=E5=85=A5redis=EF=BC=8C=E6=94=AF=E6=8C=81=E7=83=AD?= =?UTF-8?q?=E6=9B=B4=E6=96=B0=E8=AF=8D=E5=85=B8=EF=BC=8C=E5=AD=97=E5=85=B8?= =?UTF-8?q?=E6=A0=91=E5=81=9A=E6=A3=80=E7=B4=A2=E5=8C=B9=E9=85=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../handler/action/SensWordsAction.java | 186 ++++++++++++++++++ .../handler/config/SensitiveWordsConfig.java | 119 +++++++++++ .../handler/config/TaskPipelineConfig.java | 10 +- .../main/resources/application-dev.properties | 5 +- .../resources/application-test.properties | 4 + .../src/main/resources/sensitive-words.txt | 7 + 6 files changed, 324 insertions(+), 7 deletions(-) create mode 100644 austin-handler/src/main/java/com/java3y/austin/handler/action/SensWordsAction.java create mode 100644 austin-handler/src/main/java/com/java3y/austin/handler/config/SensitiveWordsConfig.java create mode 100644 austin-web/src/main/resources/sensitive-words.txt diff --git a/austin-handler/src/main/java/com/java3y/austin/handler/action/SensWordsAction.java b/austin-handler/src/main/java/com/java3y/austin/handler/action/SensWordsAction.java new file mode 100644 index 0000000..bf825e6 --- /dev/null +++ b/austin-handler/src/main/java/com/java3y/austin/handler/action/SensWordsAction.java @@ -0,0 +1,186 @@ +package com.java3y.austin.handler.action; + +import com.java3y.austin.common.domain.TaskInfo; +import com.java3y.austin.common.dto.model.*; +import com.java3y.austin.common.pipeline.BusinessProcess; +import com.java3y.austin.common.pipeline.ProcessContext; +import com.java3y.austin.handler.config.SensitiveWordsConfig; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.redis.core.RedisTemplate; +import org.springframework.stereotype.Service; +import org.springframework.util.ObjectUtils; + +import java.util.*; + +/** + * 敏感词过滤 + * + * @author xiaoxiamao + * @date 2024/08/17 + */ +@Service +public class SensWordsAction implements BusinessProcess { + + + @Autowired + private RedisTemplate redisTemplate; + + /** + * 过滤逻辑 + * + * @param context + * + * @see com.java3y.austin.common.enums.ChannelType + */ + @Override + public void process(ProcessContext context) { + // 获取敏感词典 + Set sensDict = Optional.ofNullable(redisTemplate.opsForSet().members(SensitiveWordsConfig.SENS_WORDS_DICT)) + .orElse(Collections.emptySet()); + // 如果敏感词典为空,不过滤 + if (ObjectUtils.isEmpty(sensDict)) { + return; + } + switch (context.getProcessModel().getMsgType()) { + // IM + case 10: + // 无文本内容,暂不做过滤处理 + break; + // PUSH + case 20: + PushContentModel pushContentModel = + (PushContentModel) context.getProcessModel().getContentModel(); + pushContentModel.setContent(filter(pushContentModel.getContent(), sensDict)); + break; + // SMS + case 30: + SmsContentModel smsContentModel = + (SmsContentModel) context.getProcessModel().getContentModel(); + smsContentModel.setContent(filter(smsContentModel.getContent(), sensDict)); + break; + // EMAIL + case 40: + EmailContentModel emailContentModel = + (EmailContentModel) context.getProcessModel().getContentModel(); + emailContentModel.setContent(filter(emailContentModel.getContent(), sensDict)); + break; + // OFFICIAL_ACCOUNT + case 50: + // 无文本内容,暂不做过滤处理 + break; + // MINI_PROGRAM + case 60: + // 无文本内容,暂不做过滤处理 + break; + // ENTERPRISE_WE_CHAT + case 70: + EnterpriseWeChatContentModel enterpriseWeChatContentModel = + (EnterpriseWeChatContentModel) context.getProcessModel().getContentModel(); + enterpriseWeChatContentModel.setContent(filter(enterpriseWeChatContentModel.getContent(), sensDict)); + break; + // DING_DING_ROBOT + case 80: + DingDingRobotContentModel dingDingRobotContentModel = + (DingDingRobotContentModel) context.getProcessModel().getContentModel(); + dingDingRobotContentModel.setContent(filter(dingDingRobotContentModel.getContent(), sensDict)); + break; + // DING_DING_WORK_NOTICE + case 90: + DingDingWorkContentModel dingDingWorkContentModel = + (DingDingWorkContentModel) context.getProcessModel().getContentModel(); + dingDingWorkContentModel.setContent(filter(dingDingWorkContentModel.getContent(), sensDict)); + break; + // ENTERPRISE_WE_CHAT_ROBOT + case 100: + EnterpriseWeChatRobotContentModel enterpriseWeChatRobotContentModel = + (EnterpriseWeChatRobotContentModel) context.getProcessModel().getContentModel(); + enterpriseWeChatRobotContentModel.setContent(filter(enterpriseWeChatRobotContentModel.getContent(), sensDict)); + break; + // FEI_SHU_ROBOT + case 110: + FeiShuRobotContentModel feiShuRobotContentModel = + (FeiShuRobotContentModel) context.getProcessModel().getContentModel(); + feiShuRobotContentModel.setContent(filter(feiShuRobotContentModel.getContent(), sensDict)); + break; + // ALIPAY_MINI_PROGRAM + case 120: + // 无文本内容,暂不做过滤处理 + break; + default: + break; + } + } + + /** + * 敏感词替换成对应长度'*' + * + * @param content + * @param sensDict + * @return + */ + private String filter(String content, Set sensDict) { + if (ObjectUtils.isEmpty(content) || ObjectUtils.isEmpty(sensDict)) { + return content; + } + // 构建字典树 + TrieNode root = buildTrie(sensDict); + StringBuilder result = new StringBuilder(); + int n = content.length(); + int i = 0; + + while (i < n) { + TrieNode node = root; + int j = i; + int lastMatchEnd = -1; + + while (j < n && node != null) { + node = node.children.get(content.charAt(j)); + if (node != null && node.isEnd) { + lastMatchEnd = j; + } + j++; + } + + if (lastMatchEnd != -1) { + // 找到敏感词,用'*'替换 + for (int k = i; k <= lastMatchEnd; k++) { + result.append('*'); + } + i = lastMatchEnd + 1; + } else { + result.append(content.charAt(i)); + i++; + } + } + + return result.toString(); + } + + /** + * 构建字典树 + * + * @param sensDict + * @return + */ + private TrieNode buildTrie(Set sensDict) { + TrieNode root = new TrieNode(); + for (String word : sensDict) { + TrieNode node = root; + for (char c : word.toCharArray()) { + node = node.children.computeIfAbsent(c, k -> new TrieNode()); + } + node.isEnd = true; + } + return root; + } + + /** + * 树节点 + */ + private static class TrieNode { + Map children = new HashMap<>(); + // 是否为叶子节点 + boolean isEnd = false; + } + +} diff --git a/austin-handler/src/main/java/com/java3y/austin/handler/config/SensitiveWordsConfig.java b/austin-handler/src/main/java/com/java3y/austin/handler/config/SensitiveWordsConfig.java new file mode 100644 index 0000000..abdfd23 --- /dev/null +++ b/austin-handler/src/main/java/com/java3y/austin/handler/config/SensitiveWordsConfig.java @@ -0,0 +1,119 @@ +package com.java3y.austin.handler.config; + +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.task.TaskExecutor; +import org.springframework.data.redis.core.RedisTemplate; +import org.springframework.util.ObjectUtils; + +import javax.annotation.PostConstruct; +import java.io.BufferedReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Collections; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +/** + * 敏感词配置 + * + * @author xiaoxiamao + * @date 2024/08/17 + */ +@Slf4j +@Configuration +@ConfigurationProperties(prefix = "austin.senswords") +public class SensitiveWordsConfig { + + /** + * 敏感词字典redis key + */ + public static final String SENS_WORDS_DICT = "SENS_WORDS_DICT"; + /** + * 更新时间 + */ + private static final long UPDATE_TIME = 10 * 60 * 1000; + /** + * 敏感词字典 + */ + private Set sensitiveWords = Collections.emptySet(); + + /** + * 是否开启敏感词过滤 + */ + private boolean filterEnabled; + /** + * 字典路径 + */ + private String dictPath; + @Autowired + private RedisTemplate redisTemplate; + @Autowired + private TaskExecutor taskExecutor; + + /** + * 初始化敏感词字典 + */ + @PostConstruct + public void loadSensitiveWords() { + // 不开启过滤,直接返回 + if (!filterEnabled) { + log.info("SensitiveWordConfig#loadSensitiveWords filterEnabled is false, return."); + return; + } + loadSensWords(); + storeSensWords(); + // 开启定时任务,每10分钟更新一次 + taskExecutor.execute(this::startScheduledUpdate); + } + + /** + * 加载敏感词字典 + */ + private void loadSensWords() { + if (ObjectUtils.isEmpty(dictPath)) { + log.error("SensitiveWordConfig#loadSensWords dictPath is null or empty, skipping load."); + return; + } + try (BufferedReader reader = Files.newBufferedReader(Paths.get(dictPath))) { + sensitiveWords = reader.lines().map(String::trim).collect(Collectors.toSet()); + } catch (IOException e) { + log.error("SensitiveWordConfig#loadSensitiveWords Failed to load sensitive words from {}: {}", + dictPath, e.getMessage()); + sensitiveWords = Collections.emptySet(); + } + } + + /** + * 存储敏感词字典 + */ + private void storeSensWords() { + redisTemplate.opsForSet().add(SENS_WORDS_DICT, sensitiveWords.toArray(new String[0])); + log.info("SensitiveWordConfig#storeSensWords {} sensitive words stored in Redis under key '{}'.", + sensitiveWords.size(), SENS_WORDS_DICT); + } + + /** + * 实现热更新,修改词典后自动加载 + */ + private void startScheduledUpdate() { + while (true) { + try { + // 每10分钟更新一次 + TimeUnit.SECONDS.sleep(UPDATE_TIME); + log.info("SensitiveWordConfig#startScheduledUpdate start update..."); + loadSensitiveWords(); + storeSensWords(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + log.error("SensitiveWordConfig#startScheduledUpdate interrupted: {}", e.getMessage()); + break; + } + } + } + +} diff --git a/austin-handler/src/main/java/com/java3y/austin/handler/config/TaskPipelineConfig.java b/austin-handler/src/main/java/com/java3y/austin/handler/config/TaskPipelineConfig.java index 4d22cf4..76e8968 100644 --- a/austin-handler/src/main/java/com/java3y/austin/handler/config/TaskPipelineConfig.java +++ b/austin-handler/src/main/java/com/java3y/austin/handler/config/TaskPipelineConfig.java @@ -3,10 +3,7 @@ package com.java3y.austin.handler.config; import com.java3y.austin.common.pipeline.ProcessController; import com.java3y.austin.common.pipeline.ProcessTemplate; -import com.java3y.austin.handler.action.DeduplicationAction; -import com.java3y.austin.handler.action.DiscardAction; -import com.java3y.austin.handler.action.SendMessageAction; -import com.java3y.austin.handler.action.ShieldAction; +import com.java3y.austin.handler.action.*; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -30,6 +27,8 @@ public class TaskPipelineConfig { @Autowired private DeduplicationAction deduplicationAction; @Autowired + private SensWordsAction sensWordsAction; + @Autowired private SendMessageAction sendMessageAction; @@ -45,7 +44,8 @@ public class TaskPipelineConfig { @Bean("taskTemplate") public ProcessTemplate taskTemplate() { ProcessTemplate processTemplate = new ProcessTemplate(); - processTemplate.setProcessList(Arrays.asList(discardAction, shieldAction, deduplicationAction, sendMessageAction)); + processTemplate.setProcessList(Arrays.asList(discardAction, shieldAction, deduplicationAction, + sensWordsAction, sendMessageAction)); return processTemplate; } diff --git a/austin-web/src/main/resources/application-dev.properties b/austin-web/src/main/resources/application-dev.properties index e0a6ac4..780303e 100644 --- a/austin-web/src/main/resources/application-dev.properties +++ b/austin-web/src/main/resources/application-dev.properties @@ -16,5 +16,6 @@ austin.rule.engine.enabled=false # TODO if windows os and need upload file to send message ,replace path ! austin.business.upload.crowd.path=/Users/3y/temp - - +# TODO Whether to enable [sensitive word filter] and set the [path] of sensitive words dictionary +austin.senswords.filter.enabled=true +austin.senswords.dict.path=classpath:sensitive-words.txt diff --git a/austin-web/src/main/resources/application-test.properties b/austin-web/src/main/resources/application-test.properties index 21bd0a4..1112c02 100644 --- a/austin-web/src/main/resources/application-test.properties +++ b/austin-web/src/main/resources/application-test.properties @@ -16,6 +16,10 @@ austin.rule.engine.enabled=true # TODO if windows os and need upload file to send message ,replace path ! austin.business.upload.crowd.path=/Users/3y/temp +# TODO Whether to enable [sensitive word filter] and set the [path] of sensitive words dictionary +austin.senswords.filter.enabled=true +austin.senswords.dict.path=classpath:sensitive-words.txt + # TODO if [login use officialAccount] switch [optional], if austin.login.official.account.enable=true austin.login.official.account.enable=true austin.login.official.account.appId=wx27f83ca10e06b325 diff --git a/austin-web/src/main/resources/sensitive-words.txt b/austin-web/src/main/resources/sensitive-words.txt new file mode 100644 index 0000000..6918e2d --- /dev/null +++ b/austin-web/src/main/resources/sensitive-words.txt @@ -0,0 +1,7 @@ +机密信息 +政治敏感 +违法犯罪 +黑客攻击 +网络谩骂 +admin +password \ No newline at end of file From 537f8ffdf6be3fd714282208645b42cf18c88f56 Mon Sep 17 00:00:00 2001 From: xiaoxiamo <82970607@qq.com> Date: Tue, 20 Aug 2024 16:18:36 +0800 Subject: [PATCH 2/2] =?UTF-8?q?feat:=20=E6=95=8F=E6=84=9F=E8=AF=8D?= =?UTF-8?q?=E6=B1=87=E5=A4=84=E7=90=86=EF=BC=88=E4=BA=8C=EF=BC=89=EF=BC=9A?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=96=87=E4=BB=B6=E5=8A=A0=E8=BD=BD=E6=96=B9?= =?UTF-8?q?=E5=BC=8F=EF=BC=8C=E6=AD=A3=E5=B8=B8=E5=8A=A0=E8=BD=BD=E6=9B=B4?= =?UTF-8?q?=E6=96=B0=E8=AF=8D=E5=85=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../handler/config/SensitiveWordsConfig.java | 43 +++++++++++++------ 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/austin-handler/src/main/java/com/java3y/austin/handler/config/SensitiveWordsConfig.java b/austin-handler/src/main/java/com/java3y/austin/handler/config/SensitiveWordsConfig.java index abdfd23..d69a8e0 100644 --- a/austin-handler/src/main/java/com/java3y/austin/handler/config/SensitiveWordsConfig.java +++ b/austin-handler/src/main/java/com/java3y/austin/handler/config/SensitiveWordsConfig.java @@ -1,9 +1,12 @@ package com.java3y.austin.handler.config; +import com.java3y.austin.common.constant.CommonConstant; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Configuration; +import org.springframework.core.io.Resource; +import org.springframework.core.io.ResourceLoader; import org.springframework.core.task.TaskExecutor; import org.springframework.data.redis.core.RedisTemplate; import org.springframework.util.ObjectUtils; @@ -11,8 +14,8 @@ import org.springframework.util.ObjectUtils; import javax.annotation.PostConstruct; import java.io.BufferedReader; import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.Set; import java.util.concurrent.TimeUnit; @@ -26,17 +29,21 @@ import java.util.stream.Collectors; */ @Slf4j @Configuration -@ConfigurationProperties(prefix = "austin.senswords") public class SensitiveWordsConfig { /** * 敏感词字典redis key */ public static final String SENS_WORDS_DICT = "SENS_WORDS_DICT"; + + /** + * 文件前缀 + */ + private static final String FILE_PREFIX = "file:"; /** * 更新时间 */ - private static final long UPDATE_TIME = 10 * 60 * 1000; + private static final long UPDATE_TIME_SECONDS = 10 * 60; /** * 敏感词字典 */ @@ -45,15 +52,20 @@ public class SensitiveWordsConfig { /** * 是否开启敏感词过滤 */ + @Value("${austin.senswords.filter.enabled}") private boolean filterEnabled; /** * 字典路径 */ + @Value("${austin.senswords.dict.path}") private String dictPath; + @Autowired private RedisTemplate redisTemplate; @Autowired private TaskExecutor taskExecutor; + @Autowired + private ResourceLoader resourceLoader; /** * 初始化敏感词字典 @@ -65,9 +77,10 @@ public class SensitiveWordsConfig { log.info("SensitiveWordConfig#loadSensitiveWords filterEnabled is false, return."); return; } + // 加载并存储 loadSensWords(); storeSensWords(); - // 开启定时任务,每10分钟更新一次 + // 定时更新 taskExecutor.execute(this::startScheduledUpdate); } @@ -79,7 +92,9 @@ public class SensitiveWordsConfig { log.error("SensitiveWordConfig#loadSensWords dictPath is null or empty, skipping load."); return; } - try (BufferedReader reader = Files.newBufferedReader(Paths.get(dictPath))) { + // 为直接路径,添加前缀 + Resource resource = resourceLoader.getResource(dictPath.startsWith(CommonConstant.SLASH) ? FILE_PREFIX + dictPath : dictPath); + try (BufferedReader reader = new BufferedReader(new InputStreamReader(resource.getInputStream(), StandardCharsets.UTF_8))) { sensitiveWords = reader.lines().map(String::trim).collect(Collectors.toSet()); } catch (IOException e) { log.error("SensitiveWordConfig#loadSensitiveWords Failed to load sensitive words from {}: {}", @@ -92,9 +107,13 @@ public class SensitiveWordsConfig { * 存储敏感词字典 */ private void storeSensWords() { + redisTemplate.delete(SENS_WORDS_DICT); + if (ObjectUtils.isEmpty(sensitiveWords)) { + return; + } redisTemplate.opsForSet().add(SENS_WORDS_DICT, sensitiveWords.toArray(new String[0])); - log.info("SensitiveWordConfig#storeSensWords {} sensitive words stored in Redis under key '{}'.", - sensitiveWords.size(), SENS_WORDS_DICT); + log.debug("SensitiveWordConfig#storeSensWords sensitive words stored in Redis under key [{}], count [{}].", + SENS_WORDS_DICT, sensitiveWords.size()); } /** @@ -103,13 +122,11 @@ public class SensitiveWordsConfig { private void startScheduledUpdate() { while (true) { try { - // 每10分钟更新一次 - TimeUnit.SECONDS.sleep(UPDATE_TIME); - log.info("SensitiveWordConfig#startScheduledUpdate start update..."); + TimeUnit.SECONDS.sleep(UPDATE_TIME_SECONDS); + log.debug("SensitiveWordConfig#startScheduledUpdate start update..."); loadSensitiveWords(); storeSensWords(); } catch (InterruptedException e) { - Thread.currentThread().interrupt(); log.error("SensitiveWordConfig#startScheduledUpdate interrupted: {}", e.getMessage()); break; }