From 4d606eaf08f4640cfad61829032f5e8777708ed7 Mon Sep 17 00:00:00 2001 From: houbb Date: Sun, 10 Nov 2024 22:44:54 +0800 Subject: [PATCH] release branch 0.22.0 --- CHANGE_LOG.md | 6 ++ README.md | 15 ++-- pom.xml | 2 +- release.bat | 4 +- .../sensitive/word/bs/SensitiveWordBs.java | 24 +++++- .../word/support/data/WordDataHashMap.java | 1 - .../word/utils/InnerWordFormatUtils.java | 4 +- .../sensitive/word/bugs/b54/Bug84Test.java | 86 +++++++++++++++++++ 8 files changed, 122 insertions(+), 20 deletions(-) create mode 100644 src/test/java/com/github/houbb/sensitive/word/bugs/b54/Bug84Test.java diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 4308cb7..e65846e 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -368,3 +368,9 @@ |:---|:-----|------------------------------------|:-------------------|:------------------------------------------------------| | 1 | O | 优化白名单的匹配策略,避免长白名单时,匹配到短的黑名单,不符合预期。 | 2024-9-18 21:39:40 | https://github.com/houbb/sensitive-word/issues/76 +19 | | 2 | A | 白名单支持单个编辑 | 2024-9-18 21:39:40 | 避免全量初始化 | + +# release_0.22.0 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|----------|:--------------------|:------------------------------------------------------| +| 1 | F | 修正数字匹配问题 | 2024-11-10 22:43:08 | https://github.com/houbb/sensitive-word/issues/84 | diff --git a/README.md b/README.md index a747dfb..59e3998 100644 --- a/README.md +++ b/README.md @@ -54,21 +54,16 @@ [CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md) -### V0.19.0 - -- 针对单个词的新增/删除,无需全量初始化 -- 新增 allow/deny 空实现 - -### V0.20.0 - -- 新增数字+英文的全词匹配实现 - ### V0.21.0 - 修正白名单较长,包含了黑名单,导致白名单不符合预期的场景。 - 新增了白名单单个的编辑操作 +### V0.22.0 + +- 修正单个敏感词修改时,对应的格式处理问题 + ## 更多资料 ### 敏感词控台 @@ -99,7 +94,7 @@ com.github.houbb sensitive-word - 0.21.0 + 0.22.0 ``` diff --git a/pom.xml b/pom.xml index 2a4833e..9810c4c 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.21.0 + 0.22.0 diff --git a/release.bat b/release.bat index c167730..5563e59 100644 --- a/release.bat +++ b/release.bat @@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.21.0 +SET version=0.22.0 :::: 新版本名称 -SET newVersion=0.22.0 +SET newVersion=0.23.0 :::: 组织名称 SET groupName=com.github.houbb :::: 项目名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index 4d7e382..529f074 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -639,7 +639,11 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { if(CollectionUtil.isEmpty(collection)) { return; } - for(String word : collection) { + // fixed https://github.com/houbb/sensitive-word/issues/84 + // 主要原因是二者没有保持一致,初始化的数据和插入的数据没有做相同的格式化 + List formatList = InnerWordFormatUtils.formatWordList(collection, context); + + for(String word : formatList) { this.wordData.removeWord(word); } } @@ -650,7 +654,11 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { * @since 0.19.0 */ public void addWord(Collection collection) { - this.wordData.addWord(collection); + // fixed https://github.com/houbb/sensitive-word/issues/84 + // 主要原因是二者没有保持一致,初始化的数据和插入的数据没有做相同的格式化 + // 正常字典并没有这个问题 + List formatAllowList = InnerWordFormatUtils.formatWordList(collection, context); + this.wordData.addWord(formatAllowList); } /** @@ -690,7 +698,11 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { if(CollectionUtil.isEmpty(collection)) { return; } - for(String word : collection) { + // fixed https://github.com/houbb/sensitive-word/issues/84 + // 主要原因是二者没有保持一致,初始化的数据和插入的数据没有做相同的格式化 + List formatList = InnerWordFormatUtils.formatWordList(collection, context); + + for(String word : formatList) { this.wordDataAllow.removeWord(word); } } @@ -700,7 +712,11 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { * @since 0.21.0 */ public void addWordAllow(Collection collection) { - this.wordDataAllow.addWord(collection); + // fixed https://github.com/houbb/sensitive-word/issues/84 + // 主要原因是二者没有保持一致,初始化的数据和插入的数据没有做相同的格式化 + List formatList = InnerWordFormatUtils.formatWordList(collection, context); + + this.wordDataAllow.addWord(formatList); } /** * 新增敏感词白名单 diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java index de46765..834d85f 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java @@ -1,6 +1,5 @@ package com.github.houbb.sensitive.word.support.data; -import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.util.lang.ObjectUtil; import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.sensitive.word.api.IWordContext; diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java index 6389b77..44ce918 100644 --- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java @@ -78,10 +78,10 @@ public final class InnerWordFormatUtils { * @return 结果 * @since 0。3.0 */ - public static List formatWordList(List list, + public static List formatWordList(Collection list, final IWordContext context) { if(CollectionUtil.isEmpty(list)) { - return list; + return new ArrayList<>(); } List resultList = new ArrayList<>(list.size()); diff --git a/src/test/java/com/github/houbb/sensitive/word/bugs/b54/Bug84Test.java b/src/test/java/com/github/houbb/sensitive/word/bugs/b54/Bug84Test.java new file mode 100644 index 0000000..42eeaee --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/bugs/b54/Bug84Test.java @@ -0,0 +1,86 @@ +package com.github.houbb.sensitive.word.bugs.b54; + +import com.github.houbb.sensitive.word.bs.SensitiveWordBs; +import com.github.houbb.sensitive.word.support.allow.WordAllows; +import com.github.houbb.sensitive.word.support.deny.WordDenys; +import org.junit.Assert; +import org.junit.Test; + +public class Bug84Test { + + @Test + public void bug84IgnoreNumberStyleTrueTest() { + // 例如,敏感词“下三滥”。通过 + SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() + .wordAllow(WordAllows.empty()) + .wordDeny(WordDenys.empty()) + // 因为这里默认是 true + .ignoreNumStyle(true) + .init(); + sensitiveWordBs.addWord("下三滥"); + String result = sensitiveWordBs.findFirst("花豹用下三滥招式对付疣猪,没想到疣猪居然也有绝招"); + Assert.assertEquals("下三滥", result); + + String result2 = sensitiveWordBs.findFirst("花豹用下3滥招式对付疣猪,没想到疣猪居然也有绝招"); + Assert.assertEquals("下3滥", result2); + + // 移除 + sensitiveWordBs.removeWord("下三滥"); + String result3 = sensitiveWordBs.findFirst("花豹用下三滥招式对付疣猪,没想到疣猪居然也有绝招"); + Assert.assertNull(result3); + + String result4 = sensitiveWordBs.findFirst("花豹用下3滥招式对付疣猪,没想到疣猪居然也有绝招"); + Assert.assertNull(result4); + } + + @Test + public void bug84IgnoreNumberStyleTrueTest2() { + // 例如,敏感词“下三滥”。通过 + SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() + .wordAllow(WordAllows.empty()) + .wordDeny(WordDenys.empty()) + // 因为这里默认是 true + .ignoreNumStyle(true) + .init(); + sensitiveWordBs.addWord("下3滥"); + String result = sensitiveWordBs.findFirst("花豹用下三滥招式对付疣猪,没想到疣猪居然也有绝招"); + Assert.assertEquals("下三滥", result); + + String result2 = sensitiveWordBs.findFirst("花豹用下3滥招式对付疣猪,没想到疣猪居然也有绝招"); + Assert.assertEquals("下3滥", result2); + + // 移除 + sensitiveWordBs.removeWord("下3滥"); + String result3 = sensitiveWordBs.findFirst("花豹用下三滥招式对付疣猪,没想到疣猪居然也有绝招"); + Assert.assertNull(result3); + + String result4 = sensitiveWordBs.findFirst("花豹用下3滥招式对付疣猪,没想到疣猪居然也有绝招"); + Assert.assertNull(result4); + } + + @Test + public void bug84IgnoreNumberStyleFalseTest() { + // 例如,敏感词“下三滥”。通过 + SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() + .wordAllow(WordAllows.empty()) + .wordDeny(WordDenys.empty()) + // 因为这里默认是 true + .ignoreNumStyle(false) + .init(); + sensitiveWordBs.addWord("下三滥"); + String result = sensitiveWordBs.findFirst("花豹用下三滥招式对付疣猪,没想到疣猪居然也有绝招"); + Assert.assertEquals("下三滥", result); + + String result2 = sensitiveWordBs.findFirst("花豹用下3滥招式对付疣猪,没想到疣猪居然也有绝招"); + Assert.assertNull(result2); + + // 移除 + sensitiveWordBs.removeWord("下三滥"); + String result3 = sensitiveWordBs.findFirst("花豹用下三滥招式对付疣猪,没想到疣猪居然也有绝招"); + Assert.assertNull(result3); + + String result4 = sensitiveWordBs.findFirst("花豹用下3滥招式对付疣猪,没想到疣猪居然也有绝招"); + Assert.assertNull(result4); + } + +}