diff --git a/beacon-cache/src/main/java/com/mashibing/cache/controller/CacheController.java b/beacon-cache/src/main/java/com/mashibing/cache/controller/CacheController.java index 7ff26b5..48efd3a 100644 --- a/beacon-cache/src/main/java/com/mashibing/cache/controller/CacheController.java +++ b/beacon-cache/src/main/java/com/mashibing/cache/controller/CacheController.java @@ -76,6 +76,21 @@ public class CacheController { redisClient.sAdd(key, value); } + @PostMapping("/sinterstr/{key}/{sinterkey}") + public Set sinterStr(@PathVariable(value = "key") String key, @PathVariable String sinterkey, @RequestBody String... value) { + log.info("【缓存模块】sinterStr: key = {},sinterkey={},\nvalue = {}", key, sinterkey, value); + //1. 存入key和value + redisClient.sAdd(key, value); + //2. 取交集 + Set result = redisClient.sIntersect(key, sinterkey); + if (result != null && !result.isEmpty()) { + log.info("【缓存模块】sinterStr: 交集={}", result); + } + //3. 删除key + redisClient.delete(key); + return result; + } + @PostMapping("/smember/{key}") public Set smember(@PathVariable(value = "key") String key) { log.info("【缓存模块】smember: key = {},", key); diff --git a/beacon-common/src/main/java/com/mashibing/common/clients/BeaconCacheClient.java b/beacon-common/src/main/java/com/mashibing/common/clients/BeaconCacheClient.java index 81d7a9a..ac326fe 100644 --- a/beacon-common/src/main/java/com/mashibing/common/clients/BeaconCacheClient.java +++ b/beacon-common/src/main/java/com/mashibing/common/clients/BeaconCacheClient.java @@ -42,6 +42,9 @@ public interface BeaconCacheClient { @PostMapping("cache/saddstr/{key}") void saddStr(@PathVariable(value = "key") String key, @RequestBody String... value); + @PostMapping("cache/sinterstr/{key}/{sinterkey}") + Set sinterStr(@PathVariable(value = "key") String key, @PathVariable String sinterkey, @RequestBody String... value); + @PostMapping("cache/smember/{key}") Set smember(@PathVariable(value = "key") String key); diff --git a/beacon-strategy/pom.xml b/beacon-strategy/pom.xml index cdbef7e..99bcb85 100644 --- a/beacon-strategy/pom.xml +++ b/beacon-strategy/pom.xml @@ -48,6 +48,13 @@ org.projectlombok lombok + + + com.janeluo + ikanalyzer + 2012_u6 + + com.mashibing diff --git a/beacon-strategy/src/main/java/com/mashibing/strategy/service/strategyfilter/impl/DFADirtyWordStrategyFilter.java b/beacon-strategy/src/main/java/com/mashibing/strategy/service/strategyfilter/impl/DFADirtyWordStrategyFilter.java new file mode 100644 index 0000000..5a02ffe --- /dev/null +++ b/beacon-strategy/src/main/java/com/mashibing/strategy/service/strategyfilter/impl/DFADirtyWordStrategyFilter.java @@ -0,0 +1,51 @@ +package com.mashibing.strategy.service.strategyfilter.impl; + +import com.mashibing.common.constant.CacheConstant; +import com.mashibing.common.pojo.StandardSubmit; +import com.mashibing.strategy.feignclient.CacheClient; +import com.mashibing.strategy.service.strategyfilter.StrategyFilter; +import com.mashibing.strategy.utils.DirtyWordTree; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.wltea.analyzer.core.IKSegmenter; +import org.wltea.analyzer.core.Lexeme; + +import java.io.IOException; +import java.io.StringReader; +import java.util.HashSet; +import java.util.Set; + +/** + * @author heqijun + * @ClassName: DirtyWordStrategyFilter + * @Description: 敏感词校验 + * @date 2025/6/7 20:26 + */ + +@Slf4j +@Service(value = "dfaDirtyword") +public class DFADirtyWordStrategyFilter implements StrategyFilter { + + @Autowired + CacheClient cacheClient; + + @Override + public void strategy(StandardSubmit submit) { + log.info("【策略模块-敏感词校验】开始===================================="); + String text = submit.getText(); + Set set = new HashSet<>(); + long start = System.currentTimeMillis(); + long end = System.currentTimeMillis(); + + log.info("【策略模块-敏感词校验】通过敏感词树校验敏感词"); + Set result = DirtyWordTree.getDirtyWord(text); + + if (!result.isEmpty()) { + log.error("【策略模块-敏感词校验】短信内容包含敏感词:{}", result); + return; +// throw new RuntimeException("短信内容包含敏感词"); + } + log.info("【策略模块-敏感词校验】敏感词校验通过"); + } +} diff --git a/beacon-strategy/src/main/java/com/mashibing/strategy/service/strategyfilter/impl/DirtyWordStrategyFilter.java b/beacon-strategy/src/main/java/com/mashibing/strategy/service/strategyfilter/impl/DirtyWordStrategyFilter.java index d6ed21d..0e0d763 100644 --- a/beacon-strategy/src/main/java/com/mashibing/strategy/service/strategyfilter/impl/DirtyWordStrategyFilter.java +++ b/beacon-strategy/src/main/java/com/mashibing/strategy/service/strategyfilter/impl/DirtyWordStrategyFilter.java @@ -1,9 +1,19 @@ package com.mashibing.strategy.service.strategyfilter.impl; +import com.mashibing.common.constant.CacheConstant; import com.mashibing.common.pojo.StandardSubmit; +import com.mashibing.strategy.feignclient.CacheClient; import com.mashibing.strategy.service.strategyfilter.StrategyFilter; import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; +import org.wltea.analyzer.core.IKSegmenter; +import org.wltea.analyzer.core.Lexeme; + +import java.io.IOException; +import java.io.StringReader; +import java.util.HashSet; +import java.util.Set; /** * @author heqijun @@ -16,8 +26,41 @@ import org.springframework.stereotype.Service; @Service(value = "dirtyword") public class DirtyWordStrategyFilter implements StrategyFilter { + @Autowired + CacheClient cacheClient; + @Override - public void strategy(StandardSubmit standardSubmit) { - log.info("【策略模块-敏感词校验】。。。"); + public void strategy(StandardSubmit submit) { + log.info("【策略模块-敏感词校验】开始===================================="); + Set set = new HashSet<>(); + long start = System.currentTimeMillis(); + StringReader reader = new StringReader(submit.getText()); + IKSegmenter ik = new IKSegmenter(reader, false); + long end = System.currentTimeMillis(); + log.info("【策略模块-敏感词校验】分词器读取短信耗时:{}ms", end - start); + Lexeme lexeme = null; + start = System.currentTimeMillis(); + while (true) { + try { + lexeme = ik.next(); + } catch (IOException e) { + log.error("【策略模块-敏感词校验】IK分词器处理短信内容异常" + "\n e={}", e.getMessage()); + e.printStackTrace(); + } + if (lexeme == null) { + break; + } + set.add(lexeme.getLexemeText()); + } + end = System.currentTimeMillis(); + log.info("【策略模块-敏感词校验】分词耗时:{}ms", end - start); + //交集 + Set result = cacheClient.sinterStr(submit.getSequenceId().toString(), CacheConstant.DIRTY_WORD, set.toArray(new String[0])); + + if (result != null && !result.isEmpty()) { + log.error("【策略模块-敏感词校验】短信内容包含敏感词:{}", result); + throw new RuntimeException("短信内容包含敏感词"); + } + log.info("【策略模块-敏感词校验】敏感词校验通过"); } } diff --git a/beacon-strategy/src/main/java/com/mashibing/strategy/utils/DirtyWordTree.java b/beacon-strategy/src/main/java/com/mashibing/strategy/utils/DirtyWordTree.java new file mode 100644 index 0000000..6918760 --- /dev/null +++ b/beacon-strategy/src/main/java/com/mashibing/strategy/utils/DirtyWordTree.java @@ -0,0 +1,116 @@ +package com.mashibing.strategy.utils; + +import com.mashibing.common.constant.CacheConstant; +import com.mashibing.strategy.feignclient.CacheClient; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * @author heqijun + * @ClassName: DirtyWordTree + * @Description: 敏感词树工具类 + * @date 2025/6/8 21:23 + */ + +public class DirtyWordTree { + + private static final Map DIRTY_WORD_TREE = new HashMap<>(); + + static { + //通过SpringUtil,获取Spring中的CacheClient对象 + CacheClient cacheClient = (CacheClient) StringUtil.getBeanByClass(CacheClient.class); + //调用缓存模块接口获取全部敏感词 + Set dirtyWords = cacheClient.smember(CacheConstant.DIRTY_WORD); + //构建敏感词树 + buildTree(dirtyWords); + } + + /** + * 构建敏感词树 + * 基于dfa算法实现敏感词树 + * 原理:整体结构是一个map,每个词的第一个字都是key,每个字key的value都是一个map + * 字对应的map里存【以当前字结尾是否是敏感词】和下一个字key和value的map + * + * @param dirtyWordsSet 敏感词列表 + */ + private static void buildTree(Set dirtyWordsSet) { + Map currentMap; + //遍历每个词 + for (String dirtyWord : dirtyWordsSet) { + //外层循环中每次都要指定当前map为最外层map + currentMap = DIRTY_WORD_TREE; + for (int i = 0; i < dirtyWord.length(); i++) { + //获取词中的每个字 + String singleWord = String.valueOf(dirtyWord.charAt(i)); + //如果当前字不在当前层的map中,则添加进去 + if (!currentMap.containsKey(singleWord)) { + currentMap.put(singleWord, new HashMap()); + } + //当前字对应的map + Map currentWordMap = currentMap.get(singleWord); + //如果当前字的map中妹有isEnd,说明这是刚添进去的字,要指定isEnd + if (!currentWordMap.containsKey("isEnd") && i < dirtyWord.length() - 1) { + //如果没到当前词末尾,说明不是敏感词,指定为false + currentWordMap.put("isEnd", false); + } else if (i == dirtyWord.length() - 1) { + //如果到了当前词末尾,说明是敏感词,指定为true + currentWordMap.put("isEnd", true); + } + //每个词的循环中,下一个字要进到下一层map中 + currentMap = currentWordMap; + } + } + } + + /** + * 匹配敏感词并返回结果 + * + * @param text 要匹配的文本 + * @return 匹配结果 + */ + public static Set getDirtyWord(String text) { + + //拿到敏感词树 + Map currentMap; + Set result = new HashSet<>(); + + //遍历文本 + for (int i = 0; i < text.length(); i++) { + currentMap = DIRTY_WORD_TREE; + //记录匹配上的敏感字的长度 + int dirtyLength = 0; + boolean isDirty = false; + for (int j = i; j < text.length(); j++) { + //当前字 + String currentWord = String.valueOf(text.charAt(j)); + currentMap = (Map) currentMap.get(currentWord); + if (currentMap == null) { + //没匹配上,直接break + break; + } + //匹配上了 + dirtyLength++; + //判断是否是敏感词,不是的话,继续下一个字 + if ((Boolean) currentMap.get("isEnd")) { + //是敏感词,退出循环,记录匹配到的敏感词 + isDirty = true; + break; + } + } + + if (isDirty) { + //记录匹配到的敏感词 + result.add(text.substring(i, i + dirtyLength)); + } + + } + + return result; + } + + private DirtyWordTree() { + } +} diff --git a/beacon-strategy/src/main/java/com/mashibing/strategy/utils/StringUtil.java b/beacon-strategy/src/main/java/com/mashibing/strategy/utils/StringUtil.java new file mode 100644 index 0000000..32e3e78 --- /dev/null +++ b/beacon-strategy/src/main/java/com/mashibing/strategy/utils/StringUtil.java @@ -0,0 +1,32 @@ +package com.mashibing.strategy.utils; + +import org.springframework.beans.BeansException; +import org.springframework.context.ApplicationContext; +import org.springframework.context.ApplicationContextAware; +import org.springframework.stereotype.Component; + +/** + * @author heqijun + * @ClassName: StringUtil + * @Description: 用于在非ioc管理的类中使用spring中的bean + * @date 2025/6/8 23:09 + */ + +@Component +public class StringUtil implements ApplicationContextAware { + + private static ApplicationContext applicationContext; + + @Override + public void setApplicationContext(ApplicationContext applicationContext) throws BeansException { + StringUtil.applicationContext = applicationContext; + } + + public static Object getBeanByName(String beanName) { + return applicationContext.getBean(beanName); + } + + public static Object getBeanByClass(Class clazz) { + return applicationContext.getBean(clazz); + } +} diff --git a/beacon-test/src/test/java/com/mashibing/test/mapper/ClientBusinessMapperTest.java b/beacon-test/src/test/java/com/mashibing/test/mapper/ClientBusinessMapperTest.java index fa30f6a..b4b548c 100644 --- a/beacon-test/src/test/java/com/mashibing/test/mapper/ClientBusinessMapperTest.java +++ b/beacon-test/src/test/java/com/mashibing/test/mapper/ClientBusinessMapperTest.java @@ -27,6 +27,7 @@ class ClientBusinessMapperTest { ClientBusiness cb = mapper.findById(1L); cb.setIpAddress("192.168.1.5"); cb.setClientFilters("phase," + cb.getClientFilters()); + cb.setClientFilters( cb.getClientFilters().replace("dirtyword","dfaDirtyword")); System.out.println(cb); ObjectMapper mapper = new ObjectMapper(); cacheClient.hset("client_business:" + cb.getApikey(), mapper.convertValue(cb, Map.class));