diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md
index 7353a3b..7ffad8d 100644
--- a/CHANGE_LOG.md
+++ b/CHANGE_LOG.md
@@ -414,9 +414,8 @@
| 1 | A | wordCheck 策略支持用户自定义 | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
| 2 | A | wordCheckUrlNoPrefix | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
-# release_0.25.0
+# release_0.25.1
-| 序号 | 变更类型 | 说明 | 时间 | 备注 |
-|:---|:-----|----------------------|:-------------------|:-----|
-| 1 | A | wordCheck 策略支持用户自定义 | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
-| 2 | A | wordCheckUrlNoPrefix | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
+| 序号 | 变更类型 | 说明 | 时间 | 备注 |
+|:---|:-----|------------------------------|:------------------|:---------------------------------------------------|
+| 1 | A | 修正 tags 匹配问题,黑名单命中时返回对应的黑名单词 | 2025-5-2 20:25:04 | https://github.com/houbb/sensitive-word/issues/105 |
diff --git a/README.md b/README.md
index 27cbc28..f698569 100644
--- a/README.md
+++ b/README.md
@@ -96,7 +96,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大
com.github.houbb
sensitive-word
- 0.25.0
+ 0.25.1
```
diff --git a/pom.xml b/pom.xml
index 8e459a0..f453ffa 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
com.github.houbb
sensitive-word
- 0.25.0
+ 0.25.1
diff --git a/release.bat b/release.bat
index a87d5d3..145eb7d 100644
--- a/release.bat
+++ b/release.bat
@@ -10,7 +10,7 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
-SET version=0.25.0
+SET version=0.25.1
:::: 新版本名称
SET newVersion=0.26.0
:::: 组织名称
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java
index ceaaf47..cfb1918 100644
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java
@@ -28,4 +28,11 @@ public interface IWordResult {
*/
String type();
+ /**
+ * 实际匹配的单词,方便统一的标签等处理,实际问题排查等
+ * @return 结果
+ * @since 0.25.1
+ */
+ String word();
+
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java
index 20f90b3..8147d34 100644
--- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java
@@ -6,6 +6,7 @@ import com.github.houbb.sensitive.word.api.*;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
import com.github.houbb.sensitive.word.support.check.WordCheckResult;
+import com.github.houbb.sensitive.word.support.result.WordLengthResult;
import com.github.houbb.sensitive.word.support.result.WordResult;
import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
@@ -80,13 +81,16 @@ public class SensitiveWord extends AbstractSensitiveWord {
// 命中
- int wordLength = checkResult.wordLengthResult().wordDenyLen();
+ final WordLengthResult wordLengthResult = checkResult.wordLengthResult();
+ int wordLength = wordLengthResult.wordDenyLen();
if (wordLength > 0) {
// 保存敏感词
WordResult wordResult = WordResult.newInstance()
.startIndex(i)
.endIndex(i+wordLength)
- .type(checkResult.type());
+ .type(checkResult.type())
+ .word(wordLengthResult.wordDeny());
+
//v0.13.0 添加判断
if(wordResultCondition.match(wordResult, text, modeEnum, context)) {
resultList.add(wordResult);
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java
index 32f6ea6..4d67497 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java
@@ -59,7 +59,8 @@ public abstract class AbstractWordCheck implements IWordCheck {
return WordCheckResult.newInstance()
.wordLengthResult(wordLengthResult)
.type(getType())
- .checkClass(clazz);
+ .checkClass(clazz)
+ ;
}
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
index 4fb7488..b44cc54 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
@@ -53,6 +53,8 @@ public class WordCheckWord extends AbstractWordCheck {
int maxWhite = 0;
int maxBlack = 0;
boolean firstCheck = true;
+ String blackWord = null;
+ String whiteWord = null;
WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
@@ -72,6 +74,8 @@ public class WordCheckWord extends AbstractWordCheck {
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) {
maxWhite += tempLen;
wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND;
+
+ whiteWord = stringBuilder.toString();
}
}
@@ -80,6 +84,8 @@ public class WordCheckWord extends AbstractWordCheck {
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) {
maxBlack += tempLen;
wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND;
+
+ blackWord = stringBuilder.toString();
}
}
@@ -93,7 +99,9 @@ public class WordCheckWord extends AbstractWordCheck {
return WordLengthResult.newInstance()
.wordAllowLen(maxWhite)
- .wordDenyLen(maxBlack);
+ .wordDenyLen(maxBlack)
+ .wordAllow(whiteWord)
+ .wordDeny(blackWord);
}
@Override
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java
index d1888bd..9bc5663 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java
@@ -15,6 +15,18 @@ public class WordLengthResult {
*/
private int wordDenyLen;
+ /**
+ * 黑名单匹配词
+ * @since 0.25.1
+ */
+ private String wordDeny;
+
+ /**
+ * 白名单实际匹配值
+ * @since 0.25.1
+ */
+ private String wordAllow;
+
public static WordLengthResult newInstance() {
return new WordLengthResult();
}
@@ -37,11 +49,31 @@ public class WordLengthResult {
return this;
}
+ public String wordDeny() {
+ return wordDeny;
+ }
+
+ public WordLengthResult wordDeny(String wordDeny) {
+ this.wordDeny = wordDeny;
+ return this;
+ }
+
+ public String wordAllow() {
+ return wordAllow;
+ }
+
+ public WordLengthResult wordAllow(String wordAllow) {
+ this.wordAllow = wordAllow;
+ return this;
+ }
+
@Override
public String toString() {
return "WordLengthResult{" +
"wordAllowLen=" + wordAllowLen +
", wordDenyLen=" + wordDenyLen +
+ ", wordDeny='" + wordDeny + '\'' +
+ ", wordAllow='" + wordAllow + '\'' +
'}';
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java
index fa6e018..b1919ed 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java
@@ -18,6 +18,12 @@ public class WordResult implements IWordResult {
*/
private String type;
+ /**
+ * 单词匹配
+ * @since 0.25.0
+ */
+ private String word;
+
private WordResult(){}
public static WordResult newInstance() {
@@ -54,12 +60,23 @@ public class WordResult implements IWordResult {
return this;
}
+ @Override
+ public String word() {
+ return word;
+ }
+
+ public WordResult word(String word) {
+ this.word = word;
+ return this;
+ }
+
@Override
public String toString() {
return "WordResult{" +
"startIndex=" + startIndex +
", endIndex=" + endIndex +
", type='" + type + '\'' +
+ ", word='" + word + '\'' +
'}';
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java
index 3893f55..5e79d64 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java
@@ -1,5 +1,6 @@
package com.github.houbb.sensitive.word.support.result;
+import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
import com.github.houbb.sensitive.word.utils.InnerWordCharUtils;
@@ -17,25 +18,20 @@ public class WordResultHandlerWordTags extends AbstractWordResultHandler wordTags = InnerWordTagUtils.tags(wordForTagLookup.toString(), wordContext);
+ Set wordTags = InnerWordTagUtils.tags(word, wordContext);
+
+ // 如果为空,则尝试使用命中的敏感词匹配 v0.25.1 bug105
+ if(CollectionUtil.isEmpty(wordTags)) {
+ wordTags = InnerWordTagUtils.tags(wordResult.word(), wordContext);
+ }
+
+ dto.setWord(word);
dto.setTags(wordTags);
return dto;
diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordTagUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordTagUtils.java
index 3401e03..b2d617f 100644
--- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordTagUtils.java
+++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordTagUtils.java
@@ -26,7 +26,7 @@ public class InnerWordTagUtils {
public static Set tags(final String word,
final IWordContext wordContext) {
if(StringUtil.isEmpty(word)) {
- return Collections.emptySet();
+ return null;
}
final IWordTag wordTag = wordContext.wordTag();
diff --git a/src/test/java/com/github/houbb/sensitive/word/bugs/b105/Bug105Test.java b/src/test/java/com/github/houbb/sensitive/word/bugs/b105/Bug105Test.java
index 913d2ec..271fd95 100644
--- a/src/test/java/com/github/houbb/sensitive/word/bugs/b105/Bug105Test.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bugs/b105/Bug105Test.java
@@ -1,10 +1,8 @@
package com.github.houbb.sensitive.word.bugs.b105;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
+import java.util.*;
+import com.github.houbb.heaven.util.util.CollectionUtil;
import org.junit.Assert;
import org.junit.Test;
@@ -17,16 +15,21 @@ import com.github.houbb.sensitive.word.support.tag.WordTags;
/**
* 测试Issue #105: 处理带有噪音字符时的标签查找
* 验证在启用字符忽略功能时,敏感词标签仍能被正确查找
+ *
+ * src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java
+ * @since 0.25.1
*/
public class Bug105Test {
@Test
public void testNoiseCharacterInTaggedWords() {
+ Map> newHashMap = new HashMap<>();
+ newHashMap.put("毛主席", new HashSet<>(Arrays.asList("政治", "领导人")));
+
// 配置同时启用字符忽略和标签的实例
SensitiveWordBs ignoreAndTagWordBs = SensitiveWordBs.newInstance()
.charIgnore(SensitiveWordCharIgnores.specialChars()) // 启用字符忽略
- .wordTag(WordTags.map(Collections.singletonMap("毛主席",
- new HashSet<>(Arrays.asList("政治", "领导人")))))
+ .wordTag(WordTags.map(newHashMap))
.init();
// 包含噪音字符的敏感词文本
@@ -39,7 +42,47 @@ public class Bug105Test {
Assert.assertNotNull("标签不应为空", fixedWord.get(0).getTags());
Assert.assertTrue("应包含'政治'标签", fixedWord.get(0).getTags().contains("政治"));
Assert.assertTrue("应包含'领导人'标签", fixedWord.get(0).getTags().contains("领导人"));
-
- System.out.println("Fixed result: " + fixedWord);
}
+
+ @Test
+ public void testNoiseCharacterInTaggedWords2() {
+ Map> newHashMap = new HashMap<>();
+ newHashMap.put("毛主席", new HashSet<>(Arrays.asList("政治", "领导人")));
+ newHashMap.put("毛---主---席", new HashSet<>(Arrays.asList("政治", "领导人", "自定义的")));
+
+ // 配置同时启用字符忽略和标签的实例
+ SensitiveWordBs ignoreAndTagWordBs = SensitiveWordBs.newInstance()
+ .charIgnore(SensitiveWordCharIgnores.specialChars()) // 启用字符忽略
+ .wordTag(WordTags.map(newHashMap))
+ .init();
+
+ // 包含噪音字符的敏感词文本
+ final String noisyText = "你好毛---主---席";
+
+ // 测试同时启用字符忽略和标签的实例(修复前会失败)
+ List fixedWord = ignoreAndTagWordBs.findAll(noisyText, WordResultHandlers.wordTags());
+ Assert.assertEquals(1, fixedWord.size());
+ Assert.assertEquals("[政治, 自定义的, 领导人]", fixedWord.get(0).getTags().toString());
+ }
+
+ @Test
+ public void testNoiseCharacterInTaggedWords3() {
+ Map> newHashMap = new HashMap<>();
+ newHashMap.put("毛xxx主xxxx席", new HashSet<>(Arrays.asList("政治", "领导人", "自定义的")));
+
+ // 配置同时启用字符忽略和标签的实例
+ SensitiveWordBs ignoreAndTagWordBs = SensitiveWordBs.newInstance()
+ .charIgnore(SensitiveWordCharIgnores.specialChars()) // 启用字符忽略
+ .wordTag(WordTags.map(newHashMap))
+ .init();
+
+ // 包含噪音字符的敏感词文本
+ final String noisyText = "你好毛---主---席";
+
+ // 测试同时启用字符忽略和标签的实例(修复前会失败)
+ List fixedWord = ignoreAndTagWordBs.findAll(noisyText, WordResultHandlers.wordTags());
+ Assert.assertEquals(1, fixedWord.size());
+ Assert.assertTrue(CollectionUtil.isEmpty(fixedWord.get(0).getTags()));
+ }
+
}
diff --git a/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java
index 71640fe..9d80501 100644
--- a/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java
@@ -62,7 +62,7 @@ public class SensitiveWordHelperTest {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
List wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
- Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD'}, WordResult{startIndex=9, endIndex=12, type='WORD'}, WordResult{startIndex=18, endIndex=21, type='WORD'}]", wordList.toString());
+ Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD', word='5星红旗'}, WordResult{startIndex=9, endIndex=12, type='WORD', word='毛主席'}, WordResult{startIndex=18, endIndex=21, type='WORD', word='天安门'}]", wordList.toString());
}
@@ -99,7 +99,7 @@ public class SensitiveWordHelperTest {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw());
- Assert.assertEquals("WordResult{startIndex=0, endIndex=4, type='WORD'}", word.toString());
+ Assert.assertEquals("WordResult{startIndex=0, endIndex=4, type='WORD', word='5星红旗'}", word.toString());
}
/**
diff --git a/src/test/java/com/github/houbb/sensitive/word/support/handler/WordResultHandlerTest.java b/src/test/java/com/github/houbb/sensitive/word/support/handler/WordResultHandlerTest.java
index 9525a4d..38db75a 100644
--- a/src/test/java/com/github/houbb/sensitive/word/support/handler/WordResultHandlerTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/support/handler/WordResultHandlerTest.java
@@ -10,7 +10,7 @@ import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
-import java.util.List;
+import java.util.*;
/**
* @since 0.12.0
@@ -27,7 +27,7 @@ public class WordResultHandlerTest {
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList2.toString());
List wordList3 = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
- Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD'}, WordResult{startIndex=9, endIndex=12, type='WORD'}, WordResult{startIndex=18, endIndex=21, type='WORD'}]", wordList3.toString());
+ Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD', word='5星红旗'}, WordResult{startIndex=9, endIndex=12, type='WORD', word='毛主席'}, WordResult{startIndex=18, endIndex=21, type='WORD', word='天安门'}]", wordList3.toString());
}
@Test
@@ -35,20 +35,24 @@ public class WordResultHandlerTest {
final String text = "骂人:你他妈; 邮箱:123@qq.com; mobile: 13088889999; 网址:https://www.baidu.com";
List wordList3 = SensitiveWordHelper
.findAll(text, WordResultHandlers.raw());
- Assert.assertEquals("[WordResult{startIndex=3, endIndex=6, type='WORD'}]", wordList3.toString());
+ Assert.assertEquals("[WordResult{startIndex=3, endIndex=6, type='WORD', word='你他妈'}]", wordList3.toString());
}
@Test
- @Ignore
public void wordTagsTest() {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
// 默认敏感词标签为空
List wordList1 = SensitiveWordHelper.findAll(text, WordResultHandlers.wordTags());
- Assert.assertEquals("[WordTagsDto{word='五星红旗', tags=[]}, WordTagsDto{word='毛主席', tags=[]}, WordTagsDto{word='天安门', tags=[]}]", wordList1.toString());
+ Assert.assertEquals("[WordTagsDto{word='五星红旗', tags=null}, WordTagsDto{word='毛主席', tags=[0]}, WordTagsDto{word='天安门', tags=null}]", wordList1.toString());
+
+ Map> wordMap = new HashMap<>();
+ wordMap.put("五星红旗", new HashSet<>(Arrays.asList("政治", "国家")));
+ wordMap.put("毛主席", new HashSet<>(Arrays.asList("政治", "伟人", "国家")));
+ wordMap.put("天安门", new HashSet<>(Arrays.asList("政治", "国家", "地址")));
List wordList2 = SensitiveWordBs.newInstance()
- .wordTag(WordTags.file("D:\\github\\sensitive-word\\src\\test\\resources\\dict_tag_test.txt"))
+ .wordTag(WordTags.map(wordMap))
.init()
.findAll(text, WordResultHandlers.wordTags());
Assert.assertEquals("[WordTagsDto{word='五星红旗', tags=[政治, 国家]}, WordTagsDto{word='毛主席', tags=[政治, 伟人, 国家]}, WordTagsDto{word='天安门', tags=[政治, 国家, 地址]}]", wordList2.toString());