From bac9a301ed0ada86ad2469c0a18f2594eeba15ec Mon Sep 17 00:00:00 2001 From: houbb Date: Sat, 5 Jul 2025 19:27:01 +0800 Subject: [PATCH] release branch 0.26.2 --- CHANGE_LOG.md | 6 +++++ README.md | 2 +- pom.xml | 2 +- release.bat | 2 +- .../check/AbstractConditionWordCheck.java | 20 ++++++++++++++ .../sensitive/word/bugs/b118/Bug118Test.java | 26 +++++++++++++++++++ 6 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 src/test/java/com/github/houbb/sensitive/word/bugs/b118/Bug118Test.java diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 7a0fe33..9c19df5 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -431,3 +431,9 @@ | 序号 | 变更类型 | 说明 | 时间 | 备注 | |:---|:-----|-----------------------------------|:------------------|:---------------------------------------------------| | 1 | O | 优化敏感词词库,移除多余空行、重复词。添加 xx 不是中国的敏感词 | 2025-7-5 15:58:55 | https://github.com/houbb/sensitive-word/issues/120 | + +# release_0.26.2 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|------------------|:------------------|:---------------------------------------------------| +| 1 | F | 修正数字等默认策略,忽略字符问题 | 2025-7-5 15:58:55 | https://github.com/houbb/sensitive-word/issues/120 | diff --git a/README.md b/README.md index cd0ac19..9be4ac5 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大 com.github.houbb sensitive-word - 0.26.1 + 0.26.2 ``` diff --git a/pom.xml b/pom.xml index df6c89f..c605d85 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.26.1 + 0.26.2 diff --git a/release.bat b/release.bat index 7f96874..e6df8f5 100644 --- a/release.bat +++ b/release.bat @@ -10,7 +10,7 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.26.1 +SET version=0.26.2 :::: 新版本名称 SET newVersion=0.27.0 :::: 组织名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractConditionWordCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractConditionWordCheck.java index ffd43df..090d03e 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractConditionWordCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractConditionWordCheck.java @@ -1,6 +1,7 @@ package com.github.houbb.sensitive.word.support.check; import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; import com.github.houbb.sensitive.word.support.result.WordLengthResult; @@ -39,17 +40,31 @@ public abstract class AbstractConditionWordCheck extends AbstractWordCheck { @Override protected WordLengthResult getActualLength(int beginIndex, InnerSensitiveWordContext checkContext) { + // 忽略字符 https://github.com/houbb/sensitive-word/issues/118 + final ISensitiveWordCharIgnore charIgnore = checkContext.wordContext().charIgnore(); + final String txt = checkContext.originalText(); + final char[] chars = txt.toCharArray(); final IWordContext context = checkContext.wordContext(); final Map formatCharMapping = checkContext.formatCharMapping(); int actualLength = 0; + int tempIgnoreLen = 0; // 采用 ThreadLocal 应该可以提升性能,减少对象的创建。 StringBuilder stringBuilder = new StringBuilder(); int currentIx = 0; for(int i = beginIndex; i < txt.length(); i++) { currentIx = i; + + // 是否忽略? + boolean ignoreCharFlag = charIgnore.ignore(currentIx, chars, checkContext); + if(ignoreCharFlag) { + tempIgnoreLen++; + + continue; + } + char currentChar = txt.charAt(i); // 映射处理 char mappingChar = formatCharMapping.get(currentChar); @@ -68,7 +83,12 @@ public abstract class AbstractConditionWordCheck extends AbstractWordCheck { // 匹配 if(isStringCondition(currentIx, stringBuilder, checkContext)) { actualLength = stringBuilder.length(); + + // 加上跳过的长度 + actualLength += tempIgnoreLen; } + // 重置 + tempIgnoreLen = 0; return WordLengthResult.newInstance() .wordDenyLen(actualLength) diff --git a/src/test/java/com/github/houbb/sensitive/word/bugs/b118/Bug118Test.java b/src/test/java/com/github/houbb/sensitive/word/bugs/b118/Bug118Test.java new file mode 100644 index 0000000..84142e4 --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/bugs/b118/Bug118Test.java @@ -0,0 +1,26 @@ +package com.github.houbb.sensitive.word.bugs.b118; + +import com.github.houbb.sensitive.word.bs.SensitiveWordBs; +import com.github.houbb.sensitive.word.support.check.WordChecks; +import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores; +import org.junit.Assert; +import org.junit.Test; + +public class Bug118Test { + + @Test + public void test() { + SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() + .charIgnore(SensitiveWordCharIgnores.specialChars()) + .wordCheckNum(WordChecks.num()) + .numCheckLen(8) + .enableNumCheck(true) + .init(); + + Assert.assertEquals(sensitiveWordBs.findFirst("1234567===0001哈哈哈"), "1234567===0001"); + Assert.assertEquals(sensitiveWordBs.findFirst("12345670002 哈哈哈"), "12345670002"); + Assert.assertEquals(sensitiveWordBs.findFirst("=====123456====70002 哈哈哈"), "=====123456====70002"); + Assert.assertEquals(sensitiveWordBs.findFirst("=====123456====X70002 哈哈哈"), null); + } + +}