diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index cc1c6a1..ba3fa56 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -120,3 +120,9 @@ | 1 | A | 返回敏感词对应的下标范围 | 2021-8-8 20:51:58 | | | 2 | U | ignoreRepeat 默认为 false | 2021-8-8 20:51:58 | | | 3 | U | 把测试、系统、买卖、彩票等常用词移出敏感词库 | 2021-8-8 20:51:58 | | + +# release_0.1.1 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:---|:---|:---|:--| +| 1 | F | 自定义敏感词 allow/deny 进行格式化处理 | 2021-12-11 23:51:58 | | diff --git a/README.md b/README.md index 160e2a2..c792156 100644 --- a/README.md +++ b/README.md @@ -44,11 +44,9 @@ [CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/doc/CHANGE_LOG.md) -v0.1.0 变更: +v0.1.1 变更: -- 返回敏感词对应的开始结束下标信息 - -- 优化词库 +- 敏感词自定义 Allow/Deny 进行格式化处理 # 快速开始 @@ -64,7 +62,7 @@ v0.1.0 变更: com.github.houbb sensitive-word - 0.1.0 + 0.1.1 ``` diff --git a/pom.xml b/pom.xml index f61150f..2faadb4 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.1.0 + 0.1.1 @@ -25,8 +25,8 @@ 1.7 - 0.1.129 - 1.2.0 + 0.1.148 + 1.7.1 4.13.1 diff --git a/release.bat b/release.bat index c75af46..fb5391f 100644 --- a/release.bat +++ b/release.bat @@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.1.0 +SET version=0.1.1 :::: 新版本名称 -SET newVersion=0.2.0 +SET newVersion=0.1.2 :::: 组织名称 SET groupName=com.github.houbb :::: 项目名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index eb1ad2c..756acad 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -9,8 +9,9 @@ import com.github.houbb.sensitive.word.support.allow.WordAllows; import com.github.houbb.sensitive.word.support.deny.WordDenys; import com.github.houbb.sensitive.word.support.map.SensitiveWordMap; import com.github.houbb.sensitive.word.support.result.WordResultHandlers; +import com.github.houbb.sensitive.word.utils.InnerFormatUtils; -import java.util.List; +import java.util.*; /** * 敏感词引导类 @@ -64,7 +65,7 @@ public class SensitiveWordBs { // 加载配置信息 List denyList = wordDeny.deny(); List allowList = wordAllow.allow(); - List results = CollectionUtil.difference(denyList, allowList); + List results = getActualDenyList(denyList, allowList); // 初始化 DFA 信息 if(sensitiveWordMap == null) { @@ -74,6 +75,59 @@ public class SensitiveWordBs { sensitiveWordMap.initWordMap(results); } + /** + * 获取禁止列表中真正的禁止词汇 + * @param denyList 禁止 + * @param allowList 允许 + * @return 结果 + * @since 0.1.1 + */ + List getActualDenyList(List denyList, + List allowList) { + if(CollectionUtil.isEmpty(denyList)) { + return Collections.emptyList(); + } + if(CollectionUtil.isEmpty(allowList)) { + return denyList; + } + + List formatDenyList = this.formatWordList(denyList); + List formatAllowList = this.formatWordList(allowList); + + List resultList = new ArrayList<>(); + // O(1) + Set allowSet = new HashSet<>(formatAllowList); + + for(String deny : formatDenyList) { + if(allowSet.contains(deny)) { + continue; + } + + resultList.add(deny); + } + return resultList; + } + + /** + * 数据格式化处理 + * @param list 列表 + * @return 结果 + * @since 0.1.1 + */ + private List formatWordList(List list) { + if(CollectionUtil.isEmpty(list)) { + return list; + } + + List resultList = new ArrayList<>(list.size()); + for(String word : list) { + String formatWord = InnerFormatUtils.format(word, this.context); + resultList.add(formatWord); + } + + return resultList; + } + /** * 新建验证实例 *

diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java new file mode 100644 index 0000000..295b401 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java @@ -0,0 +1,40 @@ +package com.github.houbb.sensitive.word.utils; + +import com.github.houbb.heaven.support.instance.impl.Instances; +import com.github.houbb.heaven.util.lang.StringUtil; +import com.github.houbb.sensitive.word.api.ICharFormat; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.support.format.CharFormatChain; + +/** + * 内部格式化工具类 + * @since 0.1.1 + */ +public final class InnerFormatUtils { + + private InnerFormatUtils(){} + + /** + * 格式化 + * @param original 原始 + * @param context 上下文 + * @return 结果 + * @since 0.1.1 + */ + public static String format(String original, IWordContext context) { + if(StringUtil.isEmpty(original)) { + return original; + } + + StringBuilder stringBuilder = new StringBuilder(); + ICharFormat charFormat = Instances.singleton(CharFormatChain.class); + char[] chars = original.toCharArray(); + for(char c : chars) { + char cf = charFormat.format(c, context); + stringBuilder.append(cf); + } + + return stringBuilder.toString(); + } + +} diff --git a/src/test/java/com/github/houbb/sensitive/word/bugs/b20211211/MySensitiveTest.java b/src/test/java/com/github/houbb/sensitive/word/bugs/b20211211/MySensitiveTest.java new file mode 100644 index 0000000..1cb2144 --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/bugs/b20211211/MySensitiveTest.java @@ -0,0 +1,27 @@ +package com.github.houbb.sensitive.word.bugs.b20211211; + +import com.github.houbb.sensitive.word.api.IWordAllow; +import com.github.houbb.sensitive.word.api.IWordDeny; +import com.github.houbb.sensitive.word.bs.SensitiveWordBs; +import com.github.houbb.sensitive.word.support.allow.WordAllows; +import com.github.houbb.sensitive.word.support.deny.WordDenys; +import org.junit.Test; + +public class MySensitiveTest { + + + @Test + public void test() { + IWordDeny wordDeny = WordDenys.chains(WordDenys.system(), new MyWordDeny()); + IWordAllow wordAllow = WordAllows.chains(WordAllows.system(), new MyWordAllow()); + SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() + .wordAllow(wordAllow) + .wordDeny(wordDeny)// 各种其他配置 + .init();// init() 初始化敏感词字典 + + final String text = "五星红旗 我的自定义敏感词尼玛"; + //输出测试结果 + System.out.println("敏感词:"+sensitiveWordBs.findAll(text).toString()); + } + +} diff --git a/src/test/java/com/github/houbb/sensitive/word/bugs/b20211211/MyWordAllow.java b/src/test/java/com/github/houbb/sensitive/word/bugs/b20211211/MyWordAllow.java new file mode 100644 index 0000000..9ed85ec --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/bugs/b20211211/MyWordAllow.java @@ -0,0 +1,15 @@ +package com.github.houbb.sensitive.word.bugs.b20211211; + +import com.github.houbb.sensitive.word.api.IWordAllow; + +import java.util.Arrays; +import java.util.List; + +public class MyWordAllow implements IWordAllow { + + @Override + public List allow() { + return Arrays.asList("五星红旗"); + } + +} diff --git a/src/test/java/com/github/houbb/sensitive/word/bugs/b20211211/MyWordDeny.java b/src/test/java/com/github/houbb/sensitive/word/bugs/b20211211/MyWordDeny.java new file mode 100644 index 0000000..8512c46 --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/bugs/b20211211/MyWordDeny.java @@ -0,0 +1,16 @@ +package com.github.houbb.sensitive.word.bugs.b20211211; + +import com.github.houbb.sensitive.word.api.IWordDeny; + +import java.util.Arrays; +import java.util.List; + +public class MyWordDeny implements IWordDeny { + + @Override + public List deny() { + return Arrays.asList("尼玛"); + } + + +} diff --git a/src/test/java/com/github/houbb/sensitive/word/bugs/package-info.java b/src/test/java/com/github/houbb/sensitive/word/bugs/package-info.java new file mode 100644 index 0000000..ac4df10 --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/bugs/package-info.java @@ -0,0 +1 @@ +package com.github.houbb.sensitive.word.bugs; \ No newline at end of file