release branch 0.25.1

2026-03-22 00:17:35 +08:00 · 2025-05-02 20:54:12 +08:00
parent a9599b1ff0
commit 975769b2e7
15 changed files with 155 additions and 44 deletions
--- a/CHANGE_LOG.md
+++ b/CHANGE_LOG.md
@@ -414,9 +414,8 @@
 | 1  | A    | wordCheck 策略支持用户自定义  | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
 | 2  | A    | wordCheckUrlNoPrefix | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |

-# release_0.25.0
+# release_0.25.1

-| 序号 | 变更类型 | 说明                   | 时间                 | 备注   |
-|:---|:-----|----------------------|:-------------------|:-----|
-| 1  | A    | wordCheck 策略支持用户自定义  | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
-| 2  | A    | wordCheckUrlNoPrefix | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
+| 序号 | 变更类型 | 说明                           | 时间                | 备注                                                 |
+|:---|:-----|------------------------------|:------------------|:---------------------------------------------------|
+| 1  | A    | 修正 tags 匹配问题，黑名单命中时返回对应的黑名单词 | 2025-5-2 20:25:04 | https://github.com/houbb/sensitive-word/issues/105 |
--- a/README.md
+++ b/README.md
@@ -96,7 +96,7 @@ v0.24.0 开始内置支持对敏感词的分类细化，不过工作量比较大
 <dependency>
    <groupId>com.github.houbb</groupId>
    <artifactId>sensitive-word</artifactId>
-    <version>0.25.0</version>
+    <version>0.25.1</version>
 </dependency>
 ```

--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.github.houbb</groupId>
    <artifactId>sensitive-word</artifactId>
-    <version>0.25.0</version>
+    <version>0.25.1</version>

    <properties>
        <!--============================== All Plugins START ==============================-->
--- a/release.bat
+++ b/release.bat
@@ -10,7 +10,7 @@ ECHO "============================= RELEASE START..."

 :: 版本号信息(需要手动指定)
 :::: 旧版本名称
-SET version=0.25.0
+SET version=0.25.1
 :::: 新版本名称
 SET newVersion=0.26.0
 :::: 组织名称
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java
@@ -28,4 +28,11 @@ public interface IWordResult {
     */
    String type();

+    /**
+     * 实际匹配的单词，方便统一的标签等处理，实际问题排查等
+     * @return 结果
+     * @since 0.25.1
+     */
+    String word();
+
 }
--- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java
@@ -6,6 +6,7 @@ import com.github.houbb.sensitive.word.api.*;
 import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
 import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
 import com.github.houbb.sensitive.word.support.check.WordCheckResult;
+import com.github.houbb.sensitive.word.support.result.WordLengthResult;
 import com.github.houbb.sensitive.word.support.result.WordResult;
 import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;

@@ -80,13 +81,16 @@ public class SensitiveWord extends AbstractSensitiveWord {


            // 命中
-            int wordLength = checkResult.wordLengthResult().wordDenyLen();
+            final WordLengthResult wordLengthResult = checkResult.wordLengthResult();
+            int wordLength = wordLengthResult.wordDenyLen();
            if (wordLength > 0) {
                // 保存敏感词
                WordResult wordResult = WordResult.newInstance()
                        .startIndex(i)
                        .endIndex(i+wordLength)
-                        .type(checkResult.type());
+                        .type(checkResult.type())
+                        .word(wordLengthResult.wordDeny());
+
                //v0.13.0 添加判断
                if(wordResultCondition.match(wordResult, text, modeEnum, context)) {
                    resultList.add(wordResult);
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java
@@ -59,7 +59,8 @@ public abstract class AbstractWordCheck implements IWordCheck {
        return WordCheckResult.newInstance()
                .wordLengthResult(wordLengthResult)
                .type(getType())
-                .checkClass(clazz);
+                .checkClass(clazz)
+                ;
    }

 }
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
@@ -53,6 +53,8 @@ public class WordCheckWord extends AbstractWordCheck {
        int maxWhite = 0;
        int maxBlack = 0;
        boolean firstCheck = true;
+        String blackWord = null;
+        String whiteWord = null;

        WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
        WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
@@ -72,6 +74,8 @@ public class WordCheckWord extends AbstractWordCheck {
                if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) {
                    maxWhite += tempLen;
                    wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND;
+
+                    whiteWord = stringBuilder.toString();
                }
            }

@@ -80,6 +84,8 @@ public class WordCheckWord extends AbstractWordCheck {
                if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) {
                    maxBlack += tempLen;
                    wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND;
+
+                    blackWord = stringBuilder.toString();
                }
            }

@@ -93,7 +99,9 @@ public class WordCheckWord extends AbstractWordCheck {

        return WordLengthResult.newInstance()
                .wordAllowLen(maxWhite)
-                .wordDenyLen(maxBlack);
+                .wordDenyLen(maxBlack)
+                .wordAllow(whiteWord)
+                .wordDeny(blackWord);
    }

    @Override
--- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java
@@ -15,6 +15,18 @@ public class WordLengthResult {
     */
    private int wordDenyLen;

+    /**
+     * 黑名单匹配词
+     * @since 0.25.1
+     */
+    private String wordDeny;
+
+    /**
+     * 白名单实际匹配值
+     * @since 0.25.1
+     */
+    private String wordAllow;
+
    public static WordLengthResult newInstance() {
        return new WordLengthResult();
    }
@@ -37,11 +49,31 @@ public class WordLengthResult {
        return this;
    }

+    public String wordDeny() {
+        return wordDeny;
+    }
+
+    public WordLengthResult wordDeny(String wordDeny) {
+        this.wordDeny = wordDeny;
+        return this;
+    }
+
+    public String wordAllow() {
+        return wordAllow;
+    }
+
+    public WordLengthResult wordAllow(String wordAllow) {
+        this.wordAllow = wordAllow;
+        return this;
+    }
+
    @Override
    public String toString() {
        return "WordLengthResult{" +
                "wordAllowLen=" + wordAllowLen +
                ", wordDenyLen=" + wordDenyLen +
+                ", wordDeny='" + wordDeny + '\'' +
+                ", wordAllow='" + wordAllow + '\'' +
                '}';
    }

--- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java
@@ -18,6 +18,12 @@ public class WordResult implements IWordResult {
     */
    private String type;

+    /**
+     * 单词匹配
+     * @since 0.25.0
+     */
+    private String word;
+
    private WordResult(){}

    public static WordResult newInstance() {
@@ -54,12 +60,23 @@ public class WordResult implements IWordResult {
        return this;
    }

+    @Override
+    public String word() {
+        return word;
+    }
+
+    public WordResult word(String word) {
+        this.word = word;
+        return this;
+    }
+
    @Override
    public String toString() {
        return "WordResult{" +
                "startIndex=" + startIndex +
                ", endIndex=" + endIndex +
                ", type='" + type + '\'' +
+                ", word='" + word + '\'' +
                '}';
    }

--- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java
@@ -1,5 +1,6 @@
 package com.github.houbb.sensitive.word.support.result;

+import com.github.houbb.heaven.util.util.CollectionUtil;
 import com.github.houbb.sensitive.word.api.IWordContext;
 import com.github.houbb.sensitive.word.api.IWordResult;
 import com.github.houbb.sensitive.word.utils.InnerWordCharUtils;
@@ -17,25 +18,20 @@ public class WordResultHandlerWordTags extends AbstractWordResultHandler<WordTag

    @Override
    protected WordTagsDto doHandle(IWordResult wordResult, IWordContext wordContext, String originalText) {
+        WordTagsDto dto = new WordTagsDto();
+
        // 截取
        String word = InnerWordCharUtils.getString(originalText.toCharArray(), wordResult);

-        // 创建 DTO 并设置原始单词
-        WordTagsDto dto = new WordTagsDto();
-        dto.setWord(word);
-
-        // 如果启用了字符忽略功能，清理单词后再查找标签
-        StringBuilder wordForTagLookup = new StringBuilder();
-        char[] chars = word.toCharArray();
-        for (int i = 0; i < word.length(); i++) {
-            // 如果字符不被忽略，则保留
-            // TODO: 此处innercontext 被设为null，是否合理？
-            if (!wordContext.charIgnore().ignore(i, chars, null)) {
-                wordForTagLookup.append(chars[i]);
-            }
-        }
        // 获取 tags (使用清理后的单词查找标签)
-        Set<String> wordTags = InnerWordTagUtils.tags(wordForTagLookup.toString(), wordContext);
+        Set<String> wordTags = InnerWordTagUtils.tags(word, wordContext);
+
+        // 如果为空，则尝试使用命中的敏感词匹配 v0.25.1 bug105
+        if(CollectionUtil.isEmpty(wordTags)) {
+            wordTags = InnerWordTagUtils.tags(wordResult.word(), wordContext);
+        }
+
+        dto.setWord(word);
        dto.setTags(wordTags);

        return dto;
--- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordTagUtils.java
+++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordTagUtils.java
@@ -26,7 +26,7 @@ public class InnerWordTagUtils {
    public static Set<String> tags(final String word,
                            final IWordContext wordContext) {
        if(StringUtil.isEmpty(word)) {
-            return Collections.emptySet();
+            return null;
        }

        final IWordTag wordTag = wordContext.wordTag();
--- a/src/test/java/com/github/houbb/sensitive/word/bugs/b105/Bug105Test.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bugs/b105/Bug105Test.java
@@ -1,10 +1,8 @@
 package com.github.houbb.sensitive.word.bugs.b105;

-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
+import java.util.*;

+import com.github.houbb.heaven.util.util.CollectionUtil;
 import org.junit.Assert;
 import org.junit.Test;

@@ -17,16 +15,21 @@ import com.github.houbb.sensitive.word.support.tag.WordTags;
 /**
 * 测试Issue #105: 处理带有噪音字符时的标签查找
 * 验证在启用字符忽略功能时，敏感词标签仍能被正确查找
+ *
+ * src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java
+ * @since 0.25.1
 */
 public class Bug105Test {

        @Test
        public void testNoiseCharacterInTaggedWords() {
+                Map<String, Set<String>> newHashMap = new HashMap<>();
+                newHashMap.put("毛主席", new HashSet<>(Arrays.asList("政治", "领导人")));
+
                // 配置同时启用字符忽略和标签的实例
                SensitiveWordBs ignoreAndTagWordBs = SensitiveWordBs.newInstance()
                                .charIgnore(SensitiveWordCharIgnores.specialChars()) // 启用字符忽略
-                                .wordTag(WordTags.map(Collections.singletonMap("毛主席",
-                                                new HashSet<>(Arrays.asList("政治", "领导人")))))
+                                .wordTag(WordTags.map(newHashMap))
                                .init();

                // 包含噪音字符的敏感词文本
@@ -39,7 +42,47 @@ public class Bug105Test {
                Assert.assertNotNull("标签不应为空", fixedWord.get(0).getTags());
                Assert.assertTrue("应包含'政治'标签", fixedWord.get(0).getTags().contains("政治"));
                Assert.assertTrue("应包含'领导人'标签", fixedWord.get(0).getTags().contains("领导人"));
-
-                System.out.println("Fixed result: " + fixedWord);
        }
+
+        @Test
+        public void testNoiseCharacterInTaggedWords2() {
+                Map<String, Set<String>> newHashMap = new HashMap<>();
+                newHashMap.put("毛主席", new HashSet<>(Arrays.asList("政治", "领导人")));
+                newHashMap.put("毛---主---席", new HashSet<>(Arrays.asList("政治", "领导人", "自定义的")));
+
+                // 配置同时启用字符忽略和标签的实例
+                SensitiveWordBs ignoreAndTagWordBs = SensitiveWordBs.newInstance()
+                        .charIgnore(SensitiveWordCharIgnores.specialChars()) // 启用字符忽略
+                        .wordTag(WordTags.map(newHashMap))
+                        .init();
+
+                // 包含噪音字符的敏感词文本
+                final String noisyText = "你好毛---主---席";
+
+                // 测试同时启用字符忽略和标签的实例（修复前会失败）
+                List<WordTagsDto> fixedWord = ignoreAndTagWordBs.findAll(noisyText, WordResultHandlers.wordTags());
+                Assert.assertEquals(1, fixedWord.size());
+                Assert.assertEquals("[政治, 自定义的, 领导人]", fixedWord.get(0).getTags().toString());
+        }
+
+        @Test
+        public void testNoiseCharacterInTaggedWords3() {
+                Map<String, Set<String>> newHashMap = new HashMap<>();
+                newHashMap.put("毛xxx主xxxx席", new HashSet<>(Arrays.asList("政治", "领导人", "自定义的")));
+
+                // 配置同时启用字符忽略和标签的实例
+                SensitiveWordBs ignoreAndTagWordBs = SensitiveWordBs.newInstance()
+                        .charIgnore(SensitiveWordCharIgnores.specialChars()) // 启用字符忽略
+                        .wordTag(WordTags.map(newHashMap))
+                        .init();
+
+                // 包含噪音字符的敏感词文本
+                final String noisyText = "你好毛---主---席";
+
+                // 测试同时启用字符忽略和标签的实例（修复前会失败）
+                List<WordTagsDto> fixedWord = ignoreAndTagWordBs.findAll(noisyText, WordResultHandlers.wordTags());
+                Assert.assertEquals(1, fixedWord.size());
+                Assert.assertTrue(CollectionUtil.isEmpty(fixedWord.get(0).getTags()));
+        }
+
 }
--- a/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java
@@ -62,7 +62,7 @@ public class SensitiveWordHelperTest {
        final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。";

        List<IWordResult> wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
-        Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD'}, WordResult{startIndex=9, endIndex=12, type='WORD'}, WordResult{startIndex=18, endIndex=21, type='WORD'}]", wordList.toString());
+        Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD', word='5星红旗'}, WordResult{startIndex=9, endIndex=12, type='WORD', word='毛主席'}, WordResult{startIndex=18, endIndex=21, type='WORD', word='天安门'}]", wordList.toString());
    }


@@ -99,7 +99,7 @@ public class SensitiveWordHelperTest {
        final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。";

        IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw());
-        Assert.assertEquals("WordResult{startIndex=0, endIndex=4, type='WORD'}", word.toString());
+        Assert.assertEquals("WordResult{startIndex=0, endIndex=4, type='WORD', word='5星红旗'}", word.toString());
    }

    /**
--- a/src/test/java/com/github/houbb/sensitive/word/support/handler/WordResultHandlerTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/support/handler/WordResultHandlerTest.java
@@ -10,7 +10,7 @@ import org.junit.Assert;
 import org.junit.Ignore;
 import org.junit.Test;

-import java.util.List;
+import java.util.*;

 /**
 * @since 0.12.0
@@ -27,7 +27,7 @@ public class WordResultHandlerTest {
        Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList2.toString());

        List<IWordResult> wordList3 = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
-        Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD'}, WordResult{startIndex=9, endIndex=12, type='WORD'}, WordResult{startIndex=18, endIndex=21, type='WORD'}]", wordList3.toString());
+        Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD', word='5星红旗'}, WordResult{startIndex=9, endIndex=12, type='WORD', word='毛主席'}, WordResult{startIndex=18, endIndex=21, type='WORD', word='天安门'}]", wordList3.toString());
    }

    @Test
@@ -35,20 +35,24 @@ public class WordResultHandlerTest {
        final String text = "骂人：你他妈; 邮箱：123@qq.com; mobile: 13088889999; 网址：https://www.baidu.com";
        List<IWordResult> wordList3 = SensitiveWordHelper
                .findAll(text, WordResultHandlers.raw());
-        Assert.assertEquals("[WordResult{startIndex=3, endIndex=6, type='WORD'}]", wordList3.toString());
+        Assert.assertEquals("[WordResult{startIndex=3, endIndex=6, type='WORD', word='你他妈'}]", wordList3.toString());
    }

    @Test
-    @Ignore
    public void wordTagsTest() {
        final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。";

        // 默认敏感词标签为空
        List<WordTagsDto> wordList1 = SensitiveWordHelper.findAll(text, WordResultHandlers.wordTags());
-        Assert.assertEquals("[WordTagsDto{word='五星红旗', tags=[]}, WordTagsDto{word='毛主席', tags=[]}, WordTagsDto{word='天安门', tags=[]}]", wordList1.toString());
+        Assert.assertEquals("[WordTagsDto{word='五星红旗', tags=null}, WordTagsDto{word='毛主席', tags=[0]}, WordTagsDto{word='天安门', tags=null}]", wordList1.toString());
+
+        Map<String, Set<String>> wordMap = new HashMap<>();
+        wordMap.put("五星红旗", new HashSet<>(Arrays.asList("政治", "国家")));
+        wordMap.put("毛主席", new HashSet<>(Arrays.asList("政治", "伟人", "国家")));
+        wordMap.put("天安门", new HashSet<>(Arrays.asList("政治", "国家", "地址")));

        List<WordTagsDto> wordList2 = SensitiveWordBs.newInstance()
-                .wordTag(WordTags.file("D:\\github\\sensitive-word\\src\\test\\resources\\dict_tag_test.txt"))
+                .wordTag(WordTags.map(wordMap))
                .init()
                .findAll(text, WordResultHandlers.wordTags());
        Assert.assertEquals("[WordTagsDto{word='五星红旗', tags=[政治, 国家]}, WordTagsDto{word='毛主席', tags=[政治, 伟人, 国家]}, WordTagsDto{word='天安门', tags=[政治, 国家, 地址]}]", wordList2.toString());