diff --git a/doc/CHANGE_LOG.md b/CHANGE_LOG.md similarity index 93% rename from doc/CHANGE_LOG.md rename to CHANGE_LOG.md index a4c1aca..c71fbf9 100644 --- a/doc/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -93,3 +93,10 @@ | 序号 | 变更类型 | 说明 | 时间 | 备注 | |:---|:---|:---|:---|:--| | 1 | A | 添加对于网址的过滤 | 2020-1-16 20:51:58 | | + +# release_0.0.13 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:---|:---|:---|:--| +| 1 | A | 新增 Helper 工具类 | 2021-5-12 20:51:58 | | +| 2 | A | 新增动态词库初始化支持 | 2021-5-12 20:51:58 | | \ No newline at end of file diff --git a/README.md b/README.md index b153145..211d497 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [sensitive-word](https://github.com/houbb/sensitive-word) 基于 DFA 算法实现的高性能敏感词工具。 [![Maven Central](https://maven-badges.herokuapp.com/maven-central/com.github.houbb/sensitive-word/badge.svg)](http://mvnrepository.com/artifact/com.github.houbb/sensitive-word) - +[![Open Source Love](https://badges.frapsoft.com/os/v2/open-source.svg?v=103)](https://github.com/houbb/sensitive-word) [![](https://img.shields.io/badge/license-Apache2-FF0080.svg)](https://github.com/houbb/sensitive-word/blob/master/LICENSE.txt) ## 创作目的 @@ -56,17 +56,16 @@ com.github.houbb sensitive-word - 0.0.12 + 0.0.13 ``` ## api 概览 -`SensitiveWordBs` 作为敏感词的引导类,核心方法如下: +`SensitiveWordHelper` 作为敏感词的工具类,核心方法如下: | 方法 | 参数 | 返回值| 说明 | |:---|:---|:---|:---| -| newInstance() | 无 | 引导类 | 初始化引导类 | | contains(String) | 待验证的字符串 | 布尔值 | 验证字符串是否包含敏感词 | | findAll(String) | 待验证的字符串 | 字符串列表 | 返回字符串中所有敏感词 | | replace(String, char) | 使用指定的 char 替换敏感词 | 字符串 | 返回脱敏后的字符串 | @@ -74,14 +73,14 @@ ## 使用实例 -所有测试案例参见 [SensitiveWordBsTest](https://github.com/houbb/sensitive-word/blob/master/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java) +所有测试案例参见 [SensitiveWordHelperTest](https://github.com/houbb/sensitive-word/blob/master/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java) ### 判断是否包含敏感词 ```java final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; -Assert.assertTrue(SensitiveWordBs.newInstance().contains(text)); +Assert.assertTrue(SensitiveWordHelper.contains(text)); ``` ### 返回第一个敏感词 @@ -89,7 +88,7 @@ Assert.assertTrue(SensitiveWordBs.newInstance().contains(text)); ```java final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; -String word = SensitiveWordBs.newInstance().findFirst(text); +String word = SensitiveWordHelper.findFirst(text); Assert.assertEquals("五星红旗", word); ``` @@ -98,7 +97,7 @@ Assert.assertEquals("五星红旗", word); ```java final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; -List wordList = SensitiveWordBs.newInstance().findAll(text); +List wordList = SensitiveWordHelper.findAll(text); Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString()); ``` @@ -106,7 +105,7 @@ Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString()) ```java final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; -String result = SensitiveWordBs.newInstance().replace(text); +String result = SensitiveWordHelper.replace(text); Assert.assertEquals("****迎风飘扬,***的画像屹立在***前。", result); ``` @@ -114,7 +113,7 @@ Assert.assertEquals("****迎风飘扬,***的画像屹立在***前。", result) ```java final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; -String result = SensitiveWordBs.newInstance().replace(text, '0'); +String result = SensitiveWordHelper.replace(text, '0'); Assert.assertEquals("0000迎风飘扬,000的画像屹立在000前。", result); ``` @@ -129,7 +128,7 @@ Assert.assertEquals("0000迎风飘扬,000的画像屹立在000前。", result) ```java final String text = "fuCK the bad words."; -String word = SensitiveWordBs.newInstance().findFirst(text); +String word = SensitiveWordHelper.findFirst(text); Assert.assertEquals("fuCK", word); ``` @@ -138,7 +137,7 @@ Assert.assertEquals("fuCK", word); ```java final String text = "fuck the bad words."; -String word = SensitiveWordBs.newInstance().findFirst(text); +String word = SensitiveWordHelper.findFirst(text); Assert.assertEquals("fuck", word); ``` @@ -149,7 +148,7 @@ Assert.assertEquals("fuck", word); ```java final String text = "这个是我的微信:9⓿二肆⁹₈③⑸⒋➃㈤㊄"; -List wordList = SensitiveWordBs.newInstance().findAll(text); +List wordList = SensitiveWordHelper.findAll(text); Assert.assertEquals("[9⓿二肆⁹₈③⑸⒋➃㈤㊄]", wordList.toString()); ``` @@ -158,7 +157,7 @@ Assert.assertEquals("[9⓿二肆⁹₈③⑸⒋➃㈤㊄]", wordList.toString()) ```java final String text = "我爱我的祖国和五星紅旗。"; -List wordList = SensitiveWordBs.newInstance().findAll(text); +List wordList = SensitiveWordHelper.findAll(text); Assert.assertEquals("[五星紅旗]", wordList.toString()); ``` @@ -167,7 +166,7 @@ Assert.assertEquals("[五星紅旗]", wordList.toString()); ```java final String text = "Ⓕⓤc⒦ the bad words"; -List wordList = SensitiveWordBs.newInstance().findAll(text); +List wordList = SensitiveWordHelper.findAll(text); Assert.assertEquals("[Ⓕⓤc⒦]", wordList.toString()); ``` @@ -176,7 +175,7 @@ Assert.assertEquals("[Ⓕⓤc⒦]", wordList.toString()); ```java final String text = "ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦ the bad words"; -List wordList = SensitiveWordBs.newInstance().findAll(text); +List wordList = SensitiveWordHelper.findAll(text); Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString()); ``` @@ -185,7 +184,7 @@ Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString()); ```java final String text = "楼主好人,邮箱 sensitiveword@xx.com"; -List wordList = SensitiveWordBs.newInstance().findAll(text); +List wordList = SensitiveWordHelper.findAll(text); Assert.assertEquals("[sensitiveword@xx.com]", wordList.toString()); ``` @@ -209,10 +208,84 @@ Assert.assertEquals("[sensitiveword@xx.com]", wordList.toString()); ```java final String text = "gender 我们认为应该通过,自定义敏感词我们认为应该拒绝。"; -List wordList = SensitiveWordBs.newInstance().findAll(text); +List wordList = SensitiveWordHelper.findAll(text); Assert.assertEquals("[自定义敏感词]", wordList.toString()); ``` +# 动态加载 + +## 情景说明 + +有时候我们希望将敏感词的加载设计成动态的,比如控台修改,然后可以实时生效。 + +v0.0.13 支持了这种特性。 + +## 接口说明 + +为了实现这个特性,并且兼容以前的功能,我们定义了两个接口。 + +### IWordDeny + +接口如下,可以自定义自己的实现。 + +```java +/** + * 拒绝出现的数据-返回的内容被当做是敏感词 + * @author binbin.hou + * @since 0.0.13 + */ +public interface IWordDeny { + + /** + * 获取结果 + * @return 结果 + * @since 0.0.13 + */ + List deny(); + +} +``` + +### IWordAllow + +接口如下,可以自定义自己的实现。 + +```java +/** + * 允许的内容-返回的内容不被当做敏感词 + * @author binbin.hou + * @since 0.0.13 + */ +public interface IWordAllow { + + /** + * 获取结果 + * @return 结果 + * @since 0.0.13 + */ + List allow(); + +} +``` + +## 配置使用 + +为了让使用更加优雅,我们设计了引导类 `SensitiveWordBs`。 + +可以通过 wordDeny() 指定敏感词,wordAllow() 指定非敏感词,通过 init() 初始化敏感词字典。 + +```java +SensitiveWordBs wordBs = SensitiveWordBs.newInstance() + .wordDeny(WordDenys.system()) + .wordAllow(WordAllows.system()) + .init(); + +final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; +Assert.assertTrue(wordBs.contains(text)); +``` + +备注:init() 对于敏感词 DFA 的构建是比较耗时的,一般建议在应用初始化的时候**只初始化一次**。而不是重复初始化! + # 后期 road-map - 停顿词 diff --git a/pom.xml b/pom.xml index 80df0ca..12cd367 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.0.13-SNAPSHOT + 0.0.13 diff --git a/release.bat b/release.bat index b0cb2a0..6bb390e 100644 --- a/release.bat +++ b/release.bat @@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.0.12 +SET version=0.0.13 :::: 新版本名称 -SET newVersion=0.0.13 +SET newVersion=0.0.14 :::: 组织名称 SET groupName=com.github.houbb :::: 项目名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordAllow.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordAllow.java new file mode 100644 index 0000000..5171316 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordAllow.java @@ -0,0 +1,19 @@ +package com.github.houbb.sensitive.word.api; + +import java.util.List; + +/** + * 允许的内容-返回的内容不被当做敏感词 + * @author binbin.hou + * @since 0.0.13 + */ +public interface IWordAllow { + + /** + * 获取结果 + * @return 结果 + * @since 0.0.13 + */ + List allow(); + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java index 733e0ed..6413285 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java @@ -7,6 +7,7 @@ import java.util.List; * @author binbin.hou * @since 0.0.1 */ +@Deprecated public interface IWordData { /** diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordDeny.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordDeny.java new file mode 100644 index 0000000..7a9c76e --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordDeny.java @@ -0,0 +1,19 @@ +package com.github.houbb.sensitive.word.api; + +import java.util.List; + +/** + * 拒绝出现的数据-返回的内容被当做是敏感词 + * @author binbin.hou + * @since 0.0.13 + */ +public interface IWordDeny { + + /** + * 获取结果 + * @return 结果 + * @since 0.0.13 + */ + List deny(); + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index c0446db..4aadf1e 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -1,10 +1,13 @@ package com.github.houbb.sensitive.word.bs; import com.github.houbb.heaven.constant.CharConst; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.api.IWordData; -import com.github.houbb.sensitive.word.api.IWordMap; +import com.github.houbb.heaven.util.common.ArgUtil; +import com.github.houbb.heaven.util.util.CollectionUtil; +import com.github.houbb.sensitive.word.api.*; +import com.github.houbb.sensitive.word.exception.SensitiveWordException; +import com.github.houbb.sensitive.word.support.allow.WordAllows; import com.github.houbb.sensitive.word.support.data.SensitiveWordData; +import com.github.houbb.sensitive.word.support.deny.WordDenys; import com.github.houbb.sensitive.word.support.map.SensitiveWordMap; import java.util.List; @@ -30,37 +33,42 @@ public class SensitiveWordBs { * * @since 0.0.1 */ - private static volatile IWordMap sensitiveWordMap; + private IWordMap sensitiveWordMap; /** * 默认的执行上下文 * * @since 0.0.4 */ - private volatile IWordContext context; + private final IWordContext context = buildDefaultContext(); + + /** + * 禁止的单词 + * @since 0.0.13 + */ + private IWordDeny wordDeny = WordDenys.system(); + + /** + * 允许的单词 + * @since 0.0.13 + */ + private IWordAllow wordAllow = WordAllows.system(); /** * DCL 初始化 wordMap 信息 * - * @return 初始化后的结果 + * 注意:map 的构建是一个比较耗时的动作 * @since 0.0.4 */ - private static IWordMap initWordMap() { - if (sensitiveWordMap == null) { - synchronized (IWordMap.class) { - if (sensitiveWordMap == null) { - // 加载配置信息 - IWordData wordData = new SensitiveWordData(); - List lines = wordData.getWordData(); + private synchronized void initWordMap() { + // 加载配置信息 + List denyList = wordDeny.deny(); + List allowList = wordAllow.allow(); + List results = CollectionUtil.difference(denyList, allowList); - // 初始化 DFA 信息 - sensitiveWordMap = new SensitiveWordMap(); - sensitiveWordMap.initWordMap(lines); - } - } - } - - return sensitiveWordMap; + // 初始化 DFA 信息 + sensitiveWordMap = new SensitiveWordMap(); + sensitiveWordMap.initWordMap(results); } /** @@ -72,12 +80,44 @@ public class SensitiveWordBs { * @since 0.0.1 */ public static SensitiveWordBs newInstance() { - initWordMap(); + return new SensitiveWordBs(); + } - SensitiveWordBs bs = new SensitiveWordBs(); - bs.context = buildDefaultContext(); + /** + * 初始化 + * + * 1. 根据配置,初始化对应的 map。比较消耗性能。 + * @since 0.0.13 + * @return this + */ + public SensitiveWordBs init() { + this.initWordMap(); - return bs; + return this; + } + + /** + * 设置禁止的实现 + * @param wordDeny 禁止的实现 + * @return this + * @since 0.0.13 + */ + public SensitiveWordBs wordDeny(IWordDeny wordDeny) { + ArgUtil.notNull(wordDeny, "wordDeny"); + this.wordDeny = wordDeny; + return this; + } + + /** + * 设置允许的实现 + * @param wordAllow 允许的实现 + * @return this + * @since 0.0.13 + */ + public SensitiveWordBs wordAllow(IWordAllow wordAllow) { + ArgUtil.notNull(wordAllow, "wordAllow"); + this.wordAllow = wordAllow; + return this; } /** @@ -122,7 +162,7 @@ public class SensitiveWordBs { * @return 结果 * @since 0.0.4 */ - private static IWordContext buildDefaultContext() { + private IWordContext buildDefaultContext() { IWordContext wordContext = SensitiveWordContext.newInstance(); // 格式统一化 wordContext.ignoreCase(true); @@ -148,6 +188,8 @@ public class SensitiveWordBs { * @since 0.0.1 */ public boolean contains(final String target) { + statusCheck(); + return sensitiveWordMap.contains(target, context); } @@ -161,6 +203,8 @@ public class SensitiveWordBs { * @since 0.0.1 */ public List findAll(final String target) { + statusCheck(); + return sensitiveWordMap.findAll(target, context); } @@ -173,6 +217,8 @@ public class SensitiveWordBs { * @since 0.0.1 */ public String findFirst(final String target) { + statusCheck(); + return sensitiveWordMap.findFirst(target, context); } @@ -185,6 +231,8 @@ public class SensitiveWordBs { * @since 0.0.2 */ public String replace(final String target, final char replaceChar) { + statusCheck(); + return sensitiveWordMap.replace(target, replaceChar, context); } @@ -200,4 +248,15 @@ public class SensitiveWordBs { return this.replace(target, CharConst.STAR); } + + /** + * 状态校验 + * @since 0.0.13 + */ + private void statusCheck(){ + if(sensitiveWordMap == null) { + this.init(); + } + } + } diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java new file mode 100644 index 0000000..701eb40 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java @@ -0,0 +1,82 @@ +package com.github.houbb.sensitive.word.core; + +import com.github.houbb.sensitive.word.bs.SensitiveWordBs; + +import java.util.List; + +/** + * 敏感词工具类 + * @author binbin.hou + * @since 0.0.13 + */ +public final class SensitiveWordHelper { + + private SensitiveWordHelper(){} + + /** + * 默认的实现 + * @since 0.0.13 + */ + private static final SensitiveWordBs WORD_BS = SensitiveWordBs.newInstance().init(); + + /** + * 是否包含敏感词 + * + * @param target 目标字符串 + * @return 是否 + * @since 0.0.13 + */ + public static boolean contains(final String target) { + return WORD_BS.contains(target); + } + + /** + * 返回所有的敏感词 + * 1. 这里是默认去重的,且是有序的。 + * 2. 如果不存在,返回空列表 + * + * @param target 目标字符串 + * @return 敏感词列表 + * @since 0.0.1 + */ + public static List findAll(final String target) { + return WORD_BS.findAll(target); + } + + /** + * 返回第一个敏感词 + * (1)如果不存在,则返回 {@code null} + * + * @param target 目标字符串 + * @return 敏感词 + * @since 0.0.13 + */ + public static String findFirst(final String target) { + return WORD_BS.findFirst(target); + } + + /** + * 替换所有内容 + * + * @param target 目标字符串 + * @param replaceChar 替换为的 char + * @return 替换后结果 + * @since 0.0.13 + */ + public static String replace(final String target, final char replaceChar) { + return WORD_BS.replace(target, replaceChar); + } + + /** + * 替换所有内容 + * 1. 默认使用空格替换,避免星号改变 md 的格式。 + * + * @param target 目标字符串 + * @return 替换后结果 + * @since 0.0.13 + */ + public static String replace(final String target) { + return WORD_BS.replace(target); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowInit.java b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowInit.java new file mode 100644 index 0000000..6803637 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowInit.java @@ -0,0 +1,43 @@ +package com.github.houbb.sensitive.word.support.allow; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.heaven.support.pipeline.Pipeline; +import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline; +import com.github.houbb.sensitive.word.api.IWordAllow; + +import java.util.ArrayList; +import java.util.List; + +/** + * 初始化类 + * + * @author binbin.hou + * @since 0.0.13 + */ +@ThreadSafe +public abstract class WordAllowInit implements IWordAllow { + + /** + * 初始化列表 + * + * @param pipeline 当前列表泳道 + * @since 0.0.13 + */ + protected abstract void init(final Pipeline pipeline); + + @Override + public List allow() { + Pipeline pipeline = new DefaultPipeline<>(); + this.init(pipeline); + + List results = new ArrayList<>(); + List wordAllows = pipeline.list(); + for (IWordAllow wordAllow : wordAllows) { + List allowList = wordAllow.allow(); + results.addAll(allowList); + } + + return results; + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowSystem.java b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowSystem.java new file mode 100644 index 0000000..284da5d --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowSystem.java @@ -0,0 +1,23 @@ +package com.github.houbb.sensitive.word.support.allow; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.heaven.util.io.StreamUtil; +import com.github.houbb.sensitive.word.api.IWordAllow; +import com.github.houbb.sensitive.word.api.IWordDeny; + +import java.util.List; + +/** + * 系统默认的信息 + * @author binbin.hou + * @since 0.0.13 + */ +@ThreadSafe +public class WordAllowSystem implements IWordAllow { + + @Override + public List allow() { + return StreamUtil.readAllLines("/sensitive_word_allow.txt"); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java new file mode 100644 index 0000000..f5ce28f --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java @@ -0,0 +1,49 @@ +package com.github.houbb.sensitive.word.support.allow; + +import com.github.houbb.heaven.support.instance.impl.Instances; +import com.github.houbb.heaven.support.pipeline.Pipeline; +import com.github.houbb.heaven.util.util.ArrayUtil; +import com.github.houbb.sensitive.word.api.IWordAllow; + +/** + * 所有允许的结果 + * @author binbin.hou + * @since 0.0.13 + */ +public final class WordAllows { + + private WordAllows(){} + + /** + * 责任链 + * @param wordAllow 允许 + * @param others 其他 + * @return 结果 + * @since 0.0.13 + */ + public static IWordAllow chains(final IWordAllow wordAllow, + final IWordAllow... others) { + return new WordAllowInit() { + @Override + protected void init(Pipeline pipeline) { + pipeline.addLast(wordAllow); + + if(ArrayUtil.isNotEmpty(others)) { + for(IWordAllow other : others) { + pipeline.addLast(other); + } + } + } + }; + } + + /** + * 系统实现 + * @return 结果 + * @since 0.0.13 + */ + public static IWordAllow system() { + return Instances.singleton(WordAllowSystem.class); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenyInit.java b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenyInit.java new file mode 100644 index 0000000..0fa64c2 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenyInit.java @@ -0,0 +1,44 @@ +package com.github.houbb.sensitive.word.support.deny; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.heaven.support.pipeline.Pipeline; +import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline; +import com.github.houbb.heaven.util.io.StreamUtil; +import com.github.houbb.sensitive.word.api.IWordDeny; + +import java.util.ArrayList; +import java.util.List; + +/** + * 初始化类 + * + * @author binbin.hou + * @since 0.0.13 + */ +@ThreadSafe +public abstract class WordDenyInit implements IWordDeny { + + /** + * 初始化列表 + * + * @param pipeline 当前列表泳道 + * @since 0.0.13 + */ + protected abstract void init(final Pipeline pipeline); + + @Override + public List deny() { + Pipeline pipeline = new DefaultPipeline<>(); + this.init(pipeline); + + List results = new ArrayList<>(); + List wordDenies = pipeline.list(); + for (IWordDeny wordDeny : wordDenies) { + List denyList = wordDeny.deny(); + results.addAll(denyList); + } + + return results; + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenySystem.java b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenySystem.java new file mode 100644 index 0000000..f7282db --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenySystem.java @@ -0,0 +1,27 @@ +package com.github.houbb.sensitive.word.support.deny; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.heaven.util.guava.Guavas; +import com.github.houbb.heaven.util.io.StreamUtil; +import com.github.houbb.sensitive.word.api.IWordDeny; +import com.github.houbb.sensitive.word.constant.AppConst; + +import java.util.List; + +/** + * 系统默认的信息 + * @author binbin.hou + * @since 0.0.13 + */ +@ThreadSafe +public class WordDenySystem implements IWordDeny { + + @Override + public List deny() { + List results = StreamUtil.readAllLines("/dict.txt"); + results.addAll(StreamUtil.readAllLines("/dict_en.txt")); + results.addAll(StreamUtil.readAllLines("/sensitive_word_deny.txt")); + return results; + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java new file mode 100644 index 0000000..c15452e --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java @@ -0,0 +1,49 @@ +package com.github.houbb.sensitive.word.support.deny; + +import com.github.houbb.heaven.support.instance.impl.Instances; +import com.github.houbb.heaven.support.pipeline.Pipeline; +import com.github.houbb.heaven.util.util.ArrayUtil; +import com.github.houbb.sensitive.word.api.IWordDeny; + +/** + * 所有拒绝的结果 + * @author binbin.hou + * @since 0.0.13 + */ +public final class WordDenys { + + private WordDenys(){} + + /** + * 责任链 + * @param wordDeny 拒绝 + * @param others 其他 + * @return 结果 + * @since 0.0.13 + */ + public static IWordDeny chains(final IWordDeny wordDeny, + final IWordDeny... others) { + return new WordDenyInit() { + @Override + protected void init(Pipeline pipeline) { + pipeline.addLast(wordDeny); + + if(ArrayUtil.isNotEmpty(others)) { + for(IWordDeny other : others) { + pipeline.addLast(other); + } + } + } + }; + } + + /** + * 系统实现 + * @return 结果 + * @since 0.0.13 + */ + public static IWordDeny system() { + return Instances.singleton(WordDenySystem.class); + } + +} diff --git a/src/main/resources/dict.txt b/src/main/resources/dict.txt index 127800d..a15e8db 100644 --- a/src/main/resources/dict.txt +++ b/src/main/resources/dict.txt @@ -1,4 +1,3 @@ - 001工程 007手机防盗软件任意显软件 007间谍专业版 diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java index 75cea85..7067d5c 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java @@ -1,5 +1,7 @@ package com.github.houbb.sensitive.word.bs; +import com.github.houbb.sensitive.word.support.allow.WordAllows; +import com.github.houbb.sensitive.word.support.deny.WordDenys; import org.junit.Assert; import org.junit.Test; @@ -97,4 +99,15 @@ public class SensitiveWordBsTest { Assert.assertEquals("fuck", word); } + @Test + public void configTest() { + SensitiveWordBs wordBs = SensitiveWordBs.newInstance() + .wordDeny(WordDenys.system()) + .wordAllow(WordAllows.system()) + .init(); + + final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; + Assert.assertTrue(wordBs.contains(text)); + } + } diff --git a/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java new file mode 100644 index 0000000..1357206 --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java @@ -0,0 +1,100 @@ +package com.github.houbb.sensitive.word.core; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.List; + +/** + *

project: sensitive-word-SensitiveWordBsTest

+ *

create on 2020/1/7 23:43

+ * + * @author Administrator + * @since 0.0.13 + */ +public class SensitiveWordHelperTest { + + /** + * 是否包含 + * @since 0.0.1 + */ + @Test + public void containsTest() { + final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; + + Assert.assertTrue(SensitiveWordHelper.contains(text)); + } + + /** + * 返回所有敏感词 + * @since 0.0.1 + */ + @Test + public void findAllTest() { + final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; + + List wordList = SensitiveWordHelper.findAll(text); + Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString()); + } + + /** + * 返回所有第一个匹配的敏感词 + * @since 0.0.1 + */ + @Test + public void findFirstTest() { + final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; + + String word = SensitiveWordHelper.findFirst(text); + Assert.assertEquals("五星红旗", word); + } + + /** + * 默认的替换策略 + * @since 0.0.2 + */ + @Test + public void replaceTest() { + final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; + + String result = SensitiveWordHelper.replace(text); + Assert.assertEquals("****迎风飘扬,***的画像屹立在***前。", result); + } + + /** + * 自定义字符的替换策略 + * @since 0.0.2 + */ + @Test + public void replaceCharTest() { + final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; + + String result = SensitiveWordHelper.replace(text, '0'); + Assert.assertEquals("0000迎风飘扬,000的画像屹立在000前。", result); + } + + /** + * 忽略大小写 + * @since 0.0.4 + */ + @Test + public void ignoreCaseTest() { + final String text = "fuCK the bad words."; + + String word = SensitiveWordHelper.findFirst(text); + Assert.assertEquals("fuCK", word); + } + + /** + * 忽略半角圆角 + * @since 0.0.4 + */ + @Test + public void ignoreWidthTest() { + final String text = "fuck the bad words."; + + String word = SensitiveWordHelper.findFirst(text); + Assert.assertEquals("fuck", word); + } + +}