diff --git a/austin-handler/src/main/java/com/java3y/austin/handler/action/SensWordsAction.java b/austin-handler/src/main/java/com/java3y/austin/handler/action/SensWordsAction.java new file mode 100644 index 0000000..bf825e6 --- /dev/null +++ b/austin-handler/src/main/java/com/java3y/austin/handler/action/SensWordsAction.java @@ -0,0 +1,186 @@ +package com.java3y.austin.handler.action; + +import com.java3y.austin.common.domain.TaskInfo; +import com.java3y.austin.common.dto.model.*; +import com.java3y.austin.common.pipeline.BusinessProcess; +import com.java3y.austin.common.pipeline.ProcessContext; +import com.java3y.austin.handler.config.SensitiveWordsConfig; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.redis.core.RedisTemplate; +import org.springframework.stereotype.Service; +import org.springframework.util.ObjectUtils; + +import java.util.*; + +/** + * 敏感词过滤 + * + * @author xiaoxiamao + * @date 2024/08/17 + */ +@Service +public class SensWordsAction implements BusinessProcess { + + + @Autowired + private RedisTemplate redisTemplate; + + /** + * 过滤逻辑 + * + * @param context + * + * @see com.java3y.austin.common.enums.ChannelType + */ + @Override + public void process(ProcessContext context) { + // 获取敏感词典 + Set sensDict = Optional.ofNullable(redisTemplate.opsForSet().members(SensitiveWordsConfig.SENS_WORDS_DICT)) + .orElse(Collections.emptySet()); + // 如果敏感词典为空,不过滤 + if (ObjectUtils.isEmpty(sensDict)) { + return; + } + switch (context.getProcessModel().getMsgType()) { + // IM + case 10: + // 无文本内容,暂不做过滤处理 + break; + // PUSH + case 20: + PushContentModel pushContentModel = + (PushContentModel) context.getProcessModel().getContentModel(); + pushContentModel.setContent(filter(pushContentModel.getContent(), sensDict)); + break; + // SMS + case 30: + SmsContentModel smsContentModel = + (SmsContentModel) context.getProcessModel().getContentModel(); + smsContentModel.setContent(filter(smsContentModel.getContent(), sensDict)); + break; + // EMAIL + case 40: + EmailContentModel emailContentModel = + (EmailContentModel) context.getProcessModel().getContentModel(); + emailContentModel.setContent(filter(emailContentModel.getContent(), sensDict)); + break; + // OFFICIAL_ACCOUNT + case 50: + // 无文本内容,暂不做过滤处理 + break; + // MINI_PROGRAM + case 60: + // 无文本内容,暂不做过滤处理 + break; + // ENTERPRISE_WE_CHAT + case 70: + EnterpriseWeChatContentModel enterpriseWeChatContentModel = + (EnterpriseWeChatContentModel) context.getProcessModel().getContentModel(); + enterpriseWeChatContentModel.setContent(filter(enterpriseWeChatContentModel.getContent(), sensDict)); + break; + // DING_DING_ROBOT + case 80: + DingDingRobotContentModel dingDingRobotContentModel = + (DingDingRobotContentModel) context.getProcessModel().getContentModel(); + dingDingRobotContentModel.setContent(filter(dingDingRobotContentModel.getContent(), sensDict)); + break; + // DING_DING_WORK_NOTICE + case 90: + DingDingWorkContentModel dingDingWorkContentModel = + (DingDingWorkContentModel) context.getProcessModel().getContentModel(); + dingDingWorkContentModel.setContent(filter(dingDingWorkContentModel.getContent(), sensDict)); + break; + // ENTERPRISE_WE_CHAT_ROBOT + case 100: + EnterpriseWeChatRobotContentModel enterpriseWeChatRobotContentModel = + (EnterpriseWeChatRobotContentModel) context.getProcessModel().getContentModel(); + enterpriseWeChatRobotContentModel.setContent(filter(enterpriseWeChatRobotContentModel.getContent(), sensDict)); + break; + // FEI_SHU_ROBOT + case 110: + FeiShuRobotContentModel feiShuRobotContentModel = + (FeiShuRobotContentModel) context.getProcessModel().getContentModel(); + feiShuRobotContentModel.setContent(filter(feiShuRobotContentModel.getContent(), sensDict)); + break; + // ALIPAY_MINI_PROGRAM + case 120: + // 无文本内容,暂不做过滤处理 + break; + default: + break; + } + } + + /** + * 敏感词替换成对应长度'*' + * + * @param content + * @param sensDict + * @return + */ + private String filter(String content, Set sensDict) { + if (ObjectUtils.isEmpty(content) || ObjectUtils.isEmpty(sensDict)) { + return content; + } + // 构建字典树 + TrieNode root = buildTrie(sensDict); + StringBuilder result = new StringBuilder(); + int n = content.length(); + int i = 0; + + while (i < n) { + TrieNode node = root; + int j = i; + int lastMatchEnd = -1; + + while (j < n && node != null) { + node = node.children.get(content.charAt(j)); + if (node != null && node.isEnd) { + lastMatchEnd = j; + } + j++; + } + + if (lastMatchEnd != -1) { + // 找到敏感词,用'*'替换 + for (int k = i; k <= lastMatchEnd; k++) { + result.append('*'); + } + i = lastMatchEnd + 1; + } else { + result.append(content.charAt(i)); + i++; + } + } + + return result.toString(); + } + + /** + * 构建字典树 + * + * @param sensDict + * @return + */ + private TrieNode buildTrie(Set sensDict) { + TrieNode root = new TrieNode(); + for (String word : sensDict) { + TrieNode node = root; + for (char c : word.toCharArray()) { + node = node.children.computeIfAbsent(c, k -> new TrieNode()); + } + node.isEnd = true; + } + return root; + } + + /** + * 树节点 + */ + private static class TrieNode { + Map children = new HashMap<>(); + // 是否为叶子节点 + boolean isEnd = false; + } + +} diff --git a/austin-handler/src/main/java/com/java3y/austin/handler/config/SensitiveWordsConfig.java b/austin-handler/src/main/java/com/java3y/austin/handler/config/SensitiveWordsConfig.java new file mode 100644 index 0000000..d69a8e0 --- /dev/null +++ b/austin-handler/src/main/java/com/java3y/austin/handler/config/SensitiveWordsConfig.java @@ -0,0 +1,136 @@ +package com.java3y.austin.handler.config; + +import com.java3y.austin.common.constant.CommonConstant; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.io.Resource; +import org.springframework.core.io.ResourceLoader; +import org.springframework.core.task.TaskExecutor; +import org.springframework.data.redis.core.RedisTemplate; +import org.springframework.util.ObjectUtils; + +import javax.annotation.PostConstruct; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +/** + * 敏感词配置 + * + * @author xiaoxiamao + * @date 2024/08/17 + */ +@Slf4j +@Configuration +public class SensitiveWordsConfig { + + /** + * 敏感词字典redis key + */ + public static final String SENS_WORDS_DICT = "SENS_WORDS_DICT"; + + /** + * 文件前缀 + */ + private static final String FILE_PREFIX = "file:"; + /** + * 更新时间 + */ + private static final long UPDATE_TIME_SECONDS = 10 * 60; + /** + * 敏感词字典 + */ + private Set sensitiveWords = Collections.emptySet(); + + /** + * 是否开启敏感词过滤 + */ + @Value("${austin.senswords.filter.enabled}") + private boolean filterEnabled; + /** + * 字典路径 + */ + @Value("${austin.senswords.dict.path}") + private String dictPath; + + @Autowired + private RedisTemplate redisTemplate; + @Autowired + private TaskExecutor taskExecutor; + @Autowired + private ResourceLoader resourceLoader; + + /** + * 初始化敏感词字典 + */ + @PostConstruct + public void loadSensitiveWords() { + // 不开启过滤,直接返回 + if (!filterEnabled) { + log.info("SensitiveWordConfig#loadSensitiveWords filterEnabled is false, return."); + return; + } + // 加载并存储 + loadSensWords(); + storeSensWords(); + // 定时更新 + taskExecutor.execute(this::startScheduledUpdate); + } + + /** + * 加载敏感词字典 + */ + private void loadSensWords() { + if (ObjectUtils.isEmpty(dictPath)) { + log.error("SensitiveWordConfig#loadSensWords dictPath is null or empty, skipping load."); + return; + } + // 为直接路径,添加前缀 + Resource resource = resourceLoader.getResource(dictPath.startsWith(CommonConstant.SLASH) ? FILE_PREFIX + dictPath : dictPath); + try (BufferedReader reader = new BufferedReader(new InputStreamReader(resource.getInputStream(), StandardCharsets.UTF_8))) { + sensitiveWords = reader.lines().map(String::trim).collect(Collectors.toSet()); + } catch (IOException e) { + log.error("SensitiveWordConfig#loadSensitiveWords Failed to load sensitive words from {}: {}", + dictPath, e.getMessage()); + sensitiveWords = Collections.emptySet(); + } + } + + /** + * 存储敏感词字典 + */ + private void storeSensWords() { + redisTemplate.delete(SENS_WORDS_DICT); + if (ObjectUtils.isEmpty(sensitiveWords)) { + return; + } + redisTemplate.opsForSet().add(SENS_WORDS_DICT, sensitiveWords.toArray(new String[0])); + log.debug("SensitiveWordConfig#storeSensWords sensitive words stored in Redis under key [{}], count [{}].", + SENS_WORDS_DICT, sensitiveWords.size()); + } + + /** + * 实现热更新,修改词典后自动加载 + */ + private void startScheduledUpdate() { + while (true) { + try { + TimeUnit.SECONDS.sleep(UPDATE_TIME_SECONDS); + log.debug("SensitiveWordConfig#startScheduledUpdate start update..."); + loadSensitiveWords(); + storeSensWords(); + } catch (InterruptedException e) { + log.error("SensitiveWordConfig#startScheduledUpdate interrupted: {}", e.getMessage()); + break; + } + } + } + +} diff --git a/austin-handler/src/main/java/com/java3y/austin/handler/config/TaskPipelineConfig.java b/austin-handler/src/main/java/com/java3y/austin/handler/config/TaskPipelineConfig.java index 4d22cf4..76e8968 100644 --- a/austin-handler/src/main/java/com/java3y/austin/handler/config/TaskPipelineConfig.java +++ b/austin-handler/src/main/java/com/java3y/austin/handler/config/TaskPipelineConfig.java @@ -3,10 +3,7 @@ package com.java3y.austin.handler.config; import com.java3y.austin.common.pipeline.ProcessController; import com.java3y.austin.common.pipeline.ProcessTemplate; -import com.java3y.austin.handler.action.DeduplicationAction; -import com.java3y.austin.handler.action.DiscardAction; -import com.java3y.austin.handler.action.SendMessageAction; -import com.java3y.austin.handler.action.ShieldAction; +import com.java3y.austin.handler.action.*; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -30,6 +27,8 @@ public class TaskPipelineConfig { @Autowired private DeduplicationAction deduplicationAction; @Autowired + private SensWordsAction sensWordsAction; + @Autowired private SendMessageAction sendMessageAction; @@ -45,7 +44,8 @@ public class TaskPipelineConfig { @Bean("taskTemplate") public ProcessTemplate taskTemplate() { ProcessTemplate processTemplate = new ProcessTemplate(); - processTemplate.setProcessList(Arrays.asList(discardAction, shieldAction, deduplicationAction, sendMessageAction)); + processTemplate.setProcessList(Arrays.asList(discardAction, shieldAction, deduplicationAction, + sensWordsAction, sendMessageAction)); return processTemplate; } diff --git a/austin-web/src/main/resources/application-dev.properties b/austin-web/src/main/resources/application-dev.properties index e0a6ac4..780303e 100644 --- a/austin-web/src/main/resources/application-dev.properties +++ b/austin-web/src/main/resources/application-dev.properties @@ -16,5 +16,6 @@ austin.rule.engine.enabled=false # TODO if windows os and need upload file to send message ,replace path ! austin.business.upload.crowd.path=/Users/3y/temp - - +# TODO Whether to enable [sensitive word filter] and set the [path] of sensitive words dictionary +austin.senswords.filter.enabled=true +austin.senswords.dict.path=classpath:sensitive-words.txt diff --git a/austin-web/src/main/resources/application-test.properties b/austin-web/src/main/resources/application-test.properties index 21bd0a4..1112c02 100644 --- a/austin-web/src/main/resources/application-test.properties +++ b/austin-web/src/main/resources/application-test.properties @@ -16,6 +16,10 @@ austin.rule.engine.enabled=true # TODO if windows os and need upload file to send message ,replace path ! austin.business.upload.crowd.path=/Users/3y/temp +# TODO Whether to enable [sensitive word filter] and set the [path] of sensitive words dictionary +austin.senswords.filter.enabled=true +austin.senswords.dict.path=classpath:sensitive-words.txt + # TODO if [login use officialAccount] switch [optional], if austin.login.official.account.enable=true austin.login.official.account.enable=true austin.login.official.account.appId=wx27f83ca10e06b325 diff --git a/austin-web/src/main/resources/sensitive-words.txt b/austin-web/src/main/resources/sensitive-words.txt new file mode 100644 index 0000000..6918e2d --- /dev/null +++ b/austin-web/src/main/resources/sensitive-words.txt @@ -0,0 +1,7 @@ +机密信息 +政治敏感 +违法犯罪 +黑客攻击 +网络谩骂 +admin +password \ No newline at end of file