diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 8c516e7..e6a4357 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -355,3 +355,9 @@ | 序号 | 变更类型 | 说明 | 时间 | 备注 | |:---|:-----|--------------------------------------------------------|:-------------------|:--------------------------------------------------| | 1 | F | 修正 #68 SensitiveWordCharIgnores.specialChars() 误判命中开始的问题 | 2024-8-28 15:02:25 | https://github.com/houbb/sensitive-word/issues/68 | + +# release_0.20.0 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|-----------|:-------------------|:--------------------------------------------------| +| 1 | A | 支持数字的全词匹配 | 2024-9-18 16:39:40 | https://github.com/houbb/sensitive-word/issues/77 | diff --git a/README.md b/README.md index 69367ce..78f4984 100644 --- a/README.md +++ b/README.md @@ -59,13 +59,9 @@ - 针对单个词的新增/删除,无需全量初始化 - 新增 allow/deny 空实现 -### V0.19.1 +### V0.20.0 -- 修正 englishWordMatch #69 单个英文字符命中错误问题 - -### V0.19.2 - -- 修正 #68 `SensitiveWordCharIgnores.specialChars()` 误判命中开始的问题 +- 新增数字+英文的全词匹配实现 ## 更多资料 @@ -557,7 +553,17 @@ Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString()); 系统内置的策略在 `WordResultConditions#alwaysTrue()` 恒为真,`WordResultConditions#englishWordMatch()` 则要求英文必须全词匹配。 -## 入门例子 +## 内置策略 + +WordResultConditions 工具类可以获取匹配策略 + +| 实现 | 说明 | 支持版本 | +|:----|:------------|:--------| +| alwaysTrue | 恒为真 | | +| englishWordMatch | 英文单词全词匹配 | v0.13.0 | +| englishWordNumMatch | 英文单词/数字全词匹配 | v0.20.0 | + +## 使用例子 原始的默认情况: diff --git a/pom.xml b/pom.xml index 23e8de6..c2cfa37 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.19.2 + 0.20.0 diff --git a/release.bat b/release.bat index 20c118f..794b4e7 100644 --- a/release.bat +++ b/release.bat @@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.19.2 +SET version=0.20.0 :::: 新版本名称 -SET newVersion=0.20.0 +SET newVersion=0.21.0 :::: 组织名称 SET groupName=com.github.houbb :::: 项目名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/support/resultcondition/WordResultConditionEnglishWordNumMatch.java b/src/main/java/com/github/houbb/sensitive/word/support/resultcondition/WordResultConditionEnglishWordNumMatch.java new file mode 100644 index 0000000..1905cc7 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/resultcondition/WordResultConditionEnglishWordNumMatch.java @@ -0,0 +1,50 @@ +package com.github.houbb.sensitive.word.support.resultcondition; + +import com.github.houbb.heaven.util.lang.CharUtil; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.api.IWordResult; +import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum; + +/** + * 英文单词和数字必须要全词匹配 + * + * https://github.com/houbb/sensitive-word/issues/77 + * + * @since 0.20.0 + */ +public class WordResultConditionEnglishWordNumMatch extends AbstractWordResultCondition { + + @Override + protected boolean doMatch(IWordResult wordResult, String text, WordValidModeEnum modeEnum, IWordContext context) { + final int startIndex = wordResult.startIndex(); + final int endIndex = wordResult.endIndex(); + // 判断处理,判断前一个字符是否为英文。如果是,则不满足 + if(startIndex > 0) { + char preC = text.charAt(startIndex-1); + if(CharUtil.isDigitOrLetter(preC)) { + return false; + } + } + + // 判断后一个字符是否为英文 + // v0.19.1 修正 cp cpm 单个字符错误命中问题 + if(endIndex < text.length()) { + char afterC = text.charAt(endIndex); + if(CharUtil.isDigitOrLetter(afterC)) { + return false; + } + } + + // 判断当前是否为英文单词 + for(int i = startIndex; i < endIndex; i++) { + char c = text.charAt(i); + if(!CharUtil.isDigitOrLetter(c)) { + return true; + } + } + + return true; + } + + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/resultcondition/WordResultConditions.java b/src/main/java/com/github/houbb/sensitive/word/support/resultcondition/WordResultConditions.java index 21d86eb..8bafaa4 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/resultcondition/WordResultConditions.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/resultcondition/WordResultConditions.java @@ -26,4 +26,13 @@ public final class WordResultConditions { return new WordResultConditionEnglishWordMatch(); } + /** + * 如果是英文或者数字,则必须全词匹匹配 + * @return 结果 + * @since 0.20.0 + */ + public static IWordResultCondition englishWordNumMatch() { + return new WordResultConditionEnglishWordNumMatch(); + } + } diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsResultConditionTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsResultConditionTest.java index f1d9f8e..f3f10ec 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsResultConditionTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsResultConditionTest.java @@ -137,4 +137,55 @@ public class SensitiveWordBsResultConditionTest { Assert.assertEquals("[cp]", wordList.toString()); } + @Test + public void englishWordNumMatchTest1() { + final String text = "cp cpm trade deficit totaled 695 billion yen, or $4.9 billion"; + + List wordList = SensitiveWordBs.newInstance() + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Arrays.asList("cp", "69"); + } + }) + .wordResultCondition(WordResultConditions.englishWordMatch()) + .init() + .findAll(text); + Assert.assertEquals("[cp, 69]", wordList.toString()); + } + + @Test + public void englishWordNumMatchTest2() { + final String text = "cp cpm trade deficit totaled 695 billion yen, or $4.9 billion"; + + List wordList = SensitiveWordBs.newInstance() + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Arrays.asList("cp", "69"); + } + }) + .wordResultCondition(WordResultConditions.englishWordNumMatch()) + .init() + .findAll(text); + Assert.assertEquals("[cp]", wordList.toString()); + } + + @Test + public void englishWordNumMatchTest3() { + final String text = "cp cpm trade deficit totaled 695 billion yen, or $4.9 billion 69"; + + List wordList = SensitiveWordBs.newInstance() + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Arrays.asList("cp", "69"); + } + }) + .wordResultCondition(WordResultConditions.englishWordNumMatch()) + .init() + .findAll(text); + Assert.assertEquals("[cp, 69]", wordList.toString()); + } + }