Merge pull request #68 from xiaoxiamo/sensitive_word

feat:添加消息敏感词过滤功能
master
Java3y 4 months ago committed by GitHub
commit e0ebc3d781
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -0,0 +1,186 @@
package com.java3y.austin.handler.action;
import com.java3y.austin.common.domain.TaskInfo;
import com.java3y.austin.common.dto.model.*;
import com.java3y.austin.common.pipeline.BusinessProcess;
import com.java3y.austin.common.pipeline.ProcessContext;
import com.java3y.austin.handler.config.SensitiveWordsConfig;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.stereotype.Service;
import org.springframework.util.ObjectUtils;
import java.util.*;
/**
*
*
* @author xiaoxiamao
* @date 2024/08/17
*/
@Service
public class SensWordsAction implements BusinessProcess<TaskInfo> {
@Autowired
private RedisTemplate<String, String> redisTemplate;
/**
*
*
* @param context
*
* @see com.java3y.austin.common.enums.ChannelType
*/
@Override
public void process(ProcessContext<TaskInfo> context) {
// 获取敏感词典
Set<String> sensDict = Optional.ofNullable(redisTemplate.opsForSet().members(SensitiveWordsConfig.SENS_WORDS_DICT))
.orElse(Collections.emptySet());
// 如果敏感词典为空,不过滤
if (ObjectUtils.isEmpty(sensDict)) {
return;
}
switch (context.getProcessModel().getMsgType()) {
// IM
case 10:
// 无文本内容,暂不做过滤处理
break;
// PUSH
case 20:
PushContentModel pushContentModel =
(PushContentModel) context.getProcessModel().getContentModel();
pushContentModel.setContent(filter(pushContentModel.getContent(), sensDict));
break;
// SMS
case 30:
SmsContentModel smsContentModel =
(SmsContentModel) context.getProcessModel().getContentModel();
smsContentModel.setContent(filter(smsContentModel.getContent(), sensDict));
break;
// EMAIL
case 40:
EmailContentModel emailContentModel =
(EmailContentModel) context.getProcessModel().getContentModel();
emailContentModel.setContent(filter(emailContentModel.getContent(), sensDict));
break;
// OFFICIAL_ACCOUNT
case 50:
// 无文本内容,暂不做过滤处理
break;
// MINI_PROGRAM
case 60:
// 无文本内容,暂不做过滤处理
break;
// ENTERPRISE_WE_CHAT
case 70:
EnterpriseWeChatContentModel enterpriseWeChatContentModel =
(EnterpriseWeChatContentModel) context.getProcessModel().getContentModel();
enterpriseWeChatContentModel.setContent(filter(enterpriseWeChatContentModel.getContent(), sensDict));
break;
// DING_DING_ROBOT
case 80:
DingDingRobotContentModel dingDingRobotContentModel =
(DingDingRobotContentModel) context.getProcessModel().getContentModel();
dingDingRobotContentModel.setContent(filter(dingDingRobotContentModel.getContent(), sensDict));
break;
// DING_DING_WORK_NOTICE
case 90:
DingDingWorkContentModel dingDingWorkContentModel =
(DingDingWorkContentModel) context.getProcessModel().getContentModel();
dingDingWorkContentModel.setContent(filter(dingDingWorkContentModel.getContent(), sensDict));
break;
// ENTERPRISE_WE_CHAT_ROBOT
case 100:
EnterpriseWeChatRobotContentModel enterpriseWeChatRobotContentModel =
(EnterpriseWeChatRobotContentModel) context.getProcessModel().getContentModel();
enterpriseWeChatRobotContentModel.setContent(filter(enterpriseWeChatRobotContentModel.getContent(), sensDict));
break;
// FEI_SHU_ROBOT
case 110:
FeiShuRobotContentModel feiShuRobotContentModel =
(FeiShuRobotContentModel) context.getProcessModel().getContentModel();
feiShuRobotContentModel.setContent(filter(feiShuRobotContentModel.getContent(), sensDict));
break;
// ALIPAY_MINI_PROGRAM
case 120:
// 无文本内容,暂不做过滤处理
break;
default:
break;
}
}
/**
* '*'
*
* @param content
* @param sensDict
* @return
*/
private String filter(String content, Set<String> sensDict) {
if (ObjectUtils.isEmpty(content) || ObjectUtils.isEmpty(sensDict)) {
return content;
}
// 构建字典树
TrieNode root = buildTrie(sensDict);
StringBuilder result = new StringBuilder();
int n = content.length();
int i = 0;
while (i < n) {
TrieNode node = root;
int j = i;
int lastMatchEnd = -1;
while (j < n && node != null) {
node = node.children.get(content.charAt(j));
if (node != null && node.isEnd) {
lastMatchEnd = j;
}
j++;
}
if (lastMatchEnd != -1) {
// 找到敏感词,用'*'替换
for (int k = i; k <= lastMatchEnd; k++) {
result.append('*');
}
i = lastMatchEnd + 1;
} else {
result.append(content.charAt(i));
i++;
}
}
return result.toString();
}
/**
*
*
* @param sensDict
* @return
*/
private TrieNode buildTrie(Set<String> sensDict) {
TrieNode root = new TrieNode();
for (String word : sensDict) {
TrieNode node = root;
for (char c : word.toCharArray()) {
node = node.children.computeIfAbsent(c, k -> new TrieNode());
}
node.isEnd = true;
}
return root;
}
/**
*
*/
private static class TrieNode {
Map<Character, TrieNode> children = new HashMap<>();
// 是否为叶子节点
boolean isEnd = false;
}
}

@ -0,0 +1,136 @@
package com.java3y.austin.handler.config;
import com.java3y.austin.common.constant.CommonConstant;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader;
import org.springframework.core.task.TaskExecutor;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.util.ObjectUtils;
import javax.annotation.PostConstruct;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
/**
*
*
* @author xiaoxiamao
* @date 2024/08/17
*/
@Slf4j
@Configuration
public class SensitiveWordsConfig {
/**
* redis key
*/
public static final String SENS_WORDS_DICT = "SENS_WORDS_DICT";
/**
*
*/
private static final String FILE_PREFIX = "file:";
/**
*
*/
private static final long UPDATE_TIME_SECONDS = 10 * 60;
/**
*
*/
private Set<String> sensitiveWords = Collections.emptySet();
/**
*
*/
@Value("${austin.senswords.filter.enabled}")
private boolean filterEnabled;
/**
*
*/
@Value("${austin.senswords.dict.path}")
private String dictPath;
@Autowired
private RedisTemplate<String, String> redisTemplate;
@Autowired
private TaskExecutor taskExecutor;
@Autowired
private ResourceLoader resourceLoader;
/**
*
*/
@PostConstruct
public void loadSensitiveWords() {
// 不开启过滤,直接返回
if (!filterEnabled) {
log.info("SensitiveWordConfig#loadSensitiveWords filterEnabled is false, return.");
return;
}
// 加载并存储
loadSensWords();
storeSensWords();
// 定时更新
taskExecutor.execute(this::startScheduledUpdate);
}
/**
*
*/
private void loadSensWords() {
if (ObjectUtils.isEmpty(dictPath)) {
log.error("SensitiveWordConfig#loadSensWords dictPath is null or empty, skipping load.");
return;
}
// 为直接路径,添加前缀
Resource resource = resourceLoader.getResource(dictPath.startsWith(CommonConstant.SLASH) ? FILE_PREFIX + dictPath : dictPath);
try (BufferedReader reader = new BufferedReader(new InputStreamReader(resource.getInputStream(), StandardCharsets.UTF_8))) {
sensitiveWords = reader.lines().map(String::trim).collect(Collectors.toSet());
} catch (IOException e) {
log.error("SensitiveWordConfig#loadSensitiveWords Failed to load sensitive words from {}: {}",
dictPath, e.getMessage());
sensitiveWords = Collections.emptySet();
}
}
/**
*
*/
private void storeSensWords() {
redisTemplate.delete(SENS_WORDS_DICT);
if (ObjectUtils.isEmpty(sensitiveWords)) {
return;
}
redisTemplate.opsForSet().add(SENS_WORDS_DICT, sensitiveWords.toArray(new String[0]));
log.debug("SensitiveWordConfig#storeSensWords sensitive words stored in Redis under key [{}], count [{}].",
SENS_WORDS_DICT, sensitiveWords.size());
}
/**
*
*/
private void startScheduledUpdate() {
while (true) {
try {
TimeUnit.SECONDS.sleep(UPDATE_TIME_SECONDS);
log.debug("SensitiveWordConfig#startScheduledUpdate start update...");
loadSensitiveWords();
storeSensWords();
} catch (InterruptedException e) {
log.error("SensitiveWordConfig#startScheduledUpdate interrupted: {}", e.getMessage());
break;
}
}
}
}

@ -3,10 +3,7 @@ package com.java3y.austin.handler.config;
import com.java3y.austin.common.pipeline.ProcessController; import com.java3y.austin.common.pipeline.ProcessController;
import com.java3y.austin.common.pipeline.ProcessTemplate; import com.java3y.austin.common.pipeline.ProcessTemplate;
import com.java3y.austin.handler.action.DeduplicationAction; import com.java3y.austin.handler.action.*;
import com.java3y.austin.handler.action.DiscardAction;
import com.java3y.austin.handler.action.SendMessageAction;
import com.java3y.austin.handler.action.ShieldAction;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Configuration;
@ -30,6 +27,8 @@ public class TaskPipelineConfig {
@Autowired @Autowired
private DeduplicationAction deduplicationAction; private DeduplicationAction deduplicationAction;
@Autowired @Autowired
private SensWordsAction sensWordsAction;
@Autowired
private SendMessageAction sendMessageAction; private SendMessageAction sendMessageAction;
@ -45,7 +44,8 @@ public class TaskPipelineConfig {
@Bean("taskTemplate") @Bean("taskTemplate")
public ProcessTemplate taskTemplate() { public ProcessTemplate taskTemplate() {
ProcessTemplate processTemplate = new ProcessTemplate(); ProcessTemplate processTemplate = new ProcessTemplate();
processTemplate.setProcessList(Arrays.asList(discardAction, shieldAction, deduplicationAction, sendMessageAction)); processTemplate.setProcessList(Arrays.asList(discardAction, shieldAction, deduplicationAction,
sensWordsAction, sendMessageAction));
return processTemplate; return processTemplate;
} }

@ -16,5 +16,6 @@ austin.rule.engine.enabled=false
# TODO if windows os and need upload file to send message ,replace path ! # TODO if windows os and need upload file to send message ,replace path !
austin.business.upload.crowd.path=/Users/3y/temp austin.business.upload.crowd.path=/Users/3y/temp
# TODO Whether to enable [sensitive word filter] and set the [path] of sensitive words dictionary
austin.senswords.filter.enabled=true
austin.senswords.dict.path=classpath:sensitive-words.txt

@ -16,6 +16,10 @@ austin.rule.engine.enabled=true
# TODO if windows os and need upload file to send message ,replace path ! # TODO if windows os and need upload file to send message ,replace path !
austin.business.upload.crowd.path=/Users/3y/temp austin.business.upload.crowd.path=/Users/3y/temp
# TODO Whether to enable [sensitive word filter] and set the [path] of sensitive words dictionary
austin.senswords.filter.enabled=true
austin.senswords.dict.path=classpath:sensitive-words.txt
# TODO if [login use officialAccount] switch [optional], if austin.login.official.account.enable=true # TODO if [login use officialAccount] switch [optional], if austin.login.official.account.enable=true
austin.login.official.account.enable=true austin.login.official.account.enable=true
austin.login.official.account.appId=wx27f83ca10e06b325 austin.login.official.account.appId=wx27f83ca10e06b325

@ -0,0 +1,7 @@
机密信息
政治敏感
违法犯罪
黑客攻击
网络谩骂
admin
password
Loading…
Cancel
Save