From dc70bcd684b41f4a1425a3330cf4213de4d1f085 Mon Sep 17 00:00:00 2001 From: yudasen <2436348937@qq.com> Date: Sat, 1 Feb 2025 11:34:13 +0800 Subject: [PATCH 1/7] =?UTF-8?q?bugfix:=20=E4=BF=AE=E5=A4=8D=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E6=95=8F=E6=84=9F=E8=AF=8D=E4=B8=AD=E5=8F=AF=E8=83=BD?= =?UTF-8?q?=E5=87=BA=E7=8E=B0=E7=9A=84=E5=B9=B6=E5=8F=91=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../houbb/sensitive/word/api/IWordData.java | 4 +- .../sensitive/word/bs/SensitiveWordBs.java | 9 +- .../word/support/data/AbstractWordData.java | 11 +- .../word/support/data/WordDataHashMap.java | 2 +- .../word/support/data/WordDataTree.java | 102 ++++++++++-------- 5 files changed, 70 insertions(+), 58 deletions(-) diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java index 7aa55cd..ef1d8b7 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java @@ -22,10 +22,10 @@ public interface IWordData extends ISensitiveWordDestroy { /** * 删除敏感词 - * @param word 单词 + * @param collection 单词 * @since 0.19.0 */ - void removeWord(String word); + void removeWord(Collection collection); /** * 新增敏感词 diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index 31441f3..efbc36f 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -639,9 +639,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { // 主要原因是二者没有保持一致,初始化的数据和插入的数据没有做相同的格式化 List formatList = InnerWordFormatUtils.formatWordList(collection, context); - for(String word : formatList) { - this.wordData.removeWord(word); - } + this.wordData.removeWord(formatList); } /** @@ -698,9 +696,8 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { // 主要原因是二者没有保持一致,初始化的数据和插入的数据没有做相同的格式化 List formatList = InnerWordFormatUtils.formatWordList(collection, context); - for(String word : formatList) { - this.wordDataAllow.removeWord(word); - } + this.wordDataAllow.removeWord(formatList); + } /** * 新增敏感词白名单 diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java b/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java index 06515d5..ae1874d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java @@ -1,6 +1,5 @@ package com.github.houbb.sensitive.word.support.data; -import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.sensitive.word.api.IWordData; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; @@ -31,9 +30,9 @@ public abstract class AbstractWordData implements IWordData { /** * 删除敏感词 - * @param word 敏感词 + * @param collection */ - protected abstract void doRemoveWord(String word); + protected abstract void doRemoveWord(Collection collection); /** * 新增敏感词 @@ -49,12 +48,12 @@ public abstract class AbstractWordData implements IWordData { } @Override - public void removeWord(String word) { - if(StringUtil.isEmpty(word)) { + public void removeWord(Collection collection) { + if(CollectionUtil.isEmpty(collection)) { return; } - doRemoveWord(word); + doRemoveWord(collection); } @Override diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java index 834d85f..bf189c6 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java @@ -89,7 +89,7 @@ public class WordDataHashMap extends AbstractWordData { } @Override - protected void doRemoveWord(String word) { + protected void doRemoveWord(Collection collection) { } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java index 453c841..5027f24 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java @@ -4,7 +4,6 @@ import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.util.lang.ObjectUtil; import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.api.IWordData; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; @@ -84,49 +83,7 @@ public class WordDataTree extends AbstractWordData { this.root = newRoot; } - @Override - protected void doRemoveWord(String word) { - WordDataTreeNode tempNode = root; - //需要删除的 - Map map = new HashMap<>(); - char[] chars = word.toCharArray(); - int length = chars.length; - for (int i = 0; i < length; i++) { - //不存在第一个词 - WordDataTreeNode subNode = tempNode.getSubNode(chars[i]); - if (subNode == null) { - return; - } - if (i == (length - 1)) { - //尾字符判断是否结束 - if (!subNode.end()) { - return; - } - if (subNode.getNodeSize() > 0) { - //尾字符下还存在字符,即标识即可 - subNode.end(false); - return; - } - } - if (subNode.end()) { - map.clear(); - } - map.put(chars[i], tempNode); - tempNode = subNode; - } - - for (Map.Entry entry : map.entrySet()) { - WordDataTreeNode value = entry.getValue(); - //节点只有一个就置空 - if (value.getNodeSize() == 1) { - value.clearNode(); - return; - } - //多个就删除 - value.removeNode(entry.getKey()); - } - } /** * 新增敏感词 @@ -143,6 +100,21 @@ public class WordDataTree extends AbstractWordData { } } + @Override + protected synchronized void doRemoveWord(Collection collection) { + for (String word : collection) { + if (StringUtil.isEmpty(word)) { + continue; + } + removeWord(this.root, word); + } + } + + + + + + /** * 获取当前的 Map * @param nowNode 当前节点 @@ -211,4 +183,48 @@ public class WordDataTree extends AbstractWordData { tempNode.end(true); } + + private void removeWord(WordDataTreeNode root, String word){ + WordDataTreeNode tempNode = root; + //需要删除的 + Map map = new HashMap<>(); + char[] chars = word.toCharArray(); + int length = chars.length; + for (int i = 0; i < length; i++) { + //不存在第一个词 + WordDataTreeNode subNode = tempNode.getSubNode(chars[i]); + if (subNode == null) { + return; + } + if (i == (length - 1)) { + //尾字符判断是否结束 + if (!subNode.end()) { + return; + } + if (subNode.getNodeSize() > 0) { + //尾字符下还存在字符,即标识即可 + subNode.end(false); + return; + } + } + if (subNode.end()) { + map.clear(); + } + map.put(chars[i], tempNode); + + tempNode = subNode; + } + + for (Map.Entry entry : map.entrySet()) { + WordDataTreeNode value = entry.getValue(); + //节点只有一个就置空 + if (value.getNodeSize() == 1) { + value.clearNode(); + return; + } + //多个就删除 + value.removeNode(entry.getKey()); + } + } + } From d6fa74884a8537ef54b143039d3abcaa9c9c5701 Mon Sep 17 00:00:00 2001 From: houbb Date: Sun, 2 Feb 2025 15:32:39 +0800 Subject: [PATCH 2/7] release branch 0.24.1 --- CHANGE_LOG.md | 8 +++++++- README.md | 6 +++++- pom.xml | 2 +- release.bat | 2 +- .../sensitive/word/support/data/AbstractWordData.java | 2 +- 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 7360944..5c64253 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -392,4 +392,10 @@ | 序号 | 变更类型 | 说明 | 时间 | 备注 | |:---|:-----|----------------|:--------------------|:--------------| | 1 | A | 内置支持多个单词标签实现策略 | 2024-12-22 14:08:20 | 强化单词标签能力,方便复用 | -| 2 | O | 升级 heaven 依赖 | 2024-12-22 14:08:20 | | \ No newline at end of file +| 2 | O | 升级 heaven 依赖 | 2024-12-22 14:08:20 | | + +# release_0.24.1 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|------------|:------------------|:---------------| +| 1 | F | 删除时添加同步锁优化 | 2025-2-2 15:30:26 | 涉及到接口调整 PR-100 | \ No newline at end of file diff --git a/README.md b/README.md index 737598e..bd74554 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,10 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大 - 初步内置实现单词标签,丰富单词标签内置策略 +### V0.24.1 + +- 删除时统一添加同步锁 sync + ## 更多资料 ### 敏感词控台 @@ -104,7 +108,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大 com.github.houbb sensitive-word - 0.24.0 + 0.24.1 ``` diff --git a/pom.xml b/pom.xml index 15fc986..6767341 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.24.0 + 0.24.1 diff --git a/release.bat b/release.bat index 88a1dcf..8ab7049 100644 --- a/release.bat +++ b/release.bat @@ -10,7 +10,7 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.24.0 +SET version=0.24.1 :::: 新版本名称 SET newVersion=0.25.0 :::: 组织名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java b/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java index ae1874d..ffb06dc 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java @@ -30,7 +30,7 @@ public abstract class AbstractWordData implements IWordData { /** * 删除敏感词 - * @param collection + * @param collection 集合 */ protected abstract void doRemoveWord(Collection collection); From 34d0ec7a459c6b7a98a8d67e0df97852f553dd8e Mon Sep 17 00:00:00 2001 From: houbb Date: Sun, 2 Feb 2025 15:36:56 +0800 Subject: [PATCH 3/7] [Feature] add for new --- .../com/github/houbb/sensitive/word/support/tag/WordTags.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/com/github/houbb/sensitive/word/support/tag/WordTags.java b/src/main/java/com/github/houbb/sensitive/word/support/tag/WordTags.java index 379e77b..189bf0c 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/tag/WordTags.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/tag/WordTags.java @@ -71,6 +71,8 @@ public class WordTags { /** * 根据标准的约定行处理 * @param lines 行信息 + * @param wordSplit 单词分割 + * @param tagSplit 标签分割 * @return 结果 */ public static IWordTag lines(final Collection lines, final String wordSplit, final String tagSplit) { From b7757e6f3f2da80ccf7ad4cf006206c3e3bfc335 Mon Sep 17 00:00:00 2001 From: houbb Date: Sun, 2 Feb 2025 16:04:31 +0800 Subject: [PATCH 4/7] release branch 0.24.2 --- CHANGE_LOG.md | 9 +++- README.md | 11 +++-- pom.xml | 2 +- release.bat | 2 +- .../houbb/sensitive/word/api/IWordCheck.java | 1 + .../word/support/result/WordLengthResult.java | 42 ++++++++++++------ .../word/benchmark/BenchmarkBasicTest.java | 43 ++++++++----------- 7 files changed, 65 insertions(+), 45 deletions(-) diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 5c64253..1ee18b6 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -398,4 +398,11 @@ | 序号 | 变更类型 | 说明 | 时间 | 备注 | |:---|:-----|------------|:------------------|:---------------| -| 1 | F | 删除时添加同步锁优化 | 2025-2-2 15:30:26 | 涉及到接口调整 PR-100 | \ No newline at end of file +| 1 | F | 删除时添加同步锁优化 | 2025-2-2 15:30:26 | 涉及到接口调整 PR-100 | + +# release_0.24.2 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|---------------------|:------------------|:-------------------| +| 1 | O | findFirst 真实实现,性能优化 | 2025-2-2 15:30:26 | PR-99 | +| 2 | O | 黑白名单遍历统一优化,性能优化 | 2025-2-2 15:30:26 | PR-99 涉及到原始结果返回值调整 | \ No newline at end of file diff --git a/README.md b/README.md index bd74554..6902280 100644 --- a/README.md +++ b/README.md @@ -58,9 +58,6 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大 [CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md) -### V0.23.0 - -- 结果条件拓展支持 wordTags 和 chains ### V0.24.0 @@ -70,6 +67,12 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大 - 删除时统一添加同步锁 sync +### V0.24.2 + +- 统一黑白名单为一次遍历,性能优化 + +- 实现真实的 findFirst,性能优化 + ## 更多资料 ### 敏感词控台 @@ -108,7 +111,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大 com.github.houbb sensitive-word - 0.24.1 + 0.24.2 ``` diff --git a/pom.xml b/pom.xml index 6767341..1f829ef 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.24.1 + 0.24.2 diff --git a/release.bat b/release.bat index 8ab7049..ea94b29 100644 --- a/release.bat +++ b/release.bat @@ -10,7 +10,7 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.24.1 +SET version=0.24.2 :::: 新版本名称 SET newVersion=0.25.0 :::: 组织名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordCheck.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordCheck.java index ac72e8e..db9191e 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordCheck.java @@ -31,6 +31,7 @@ public interface IWordCheck { * @param context 执行上下文 * @return 敏感信息对应的长度 * @since 0.0.5 + * @since 0.24.2 为了黑白名单统一,调整了对应的返回值 */ WordCheckResult sensitiveCheck(final int beginIndex, final InnerSensitiveWordContext context); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java index aac3136..d1888bd 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java @@ -1,32 +1,48 @@ package com.github.houbb.sensitive.word.support.result; +/** + * 说明:统一让黑白名单一次遍历,性能优化 + * + * @since 0.24.2 + */ public class WordLengthResult { - private int wordAllowLen; - private int wordDenyLen; + /** + * 白名单长度 + */ + private int wordAllowLen; + /** + * 黑名单长度 + */ + private int wordDenyLen; - - private WordLengthResult(){} - - public static WordLengthResult newInstance(){ + public static WordLengthResult newInstance() { return new WordLengthResult(); } - - public int wordAllowLen(){ + public int wordAllowLen() { return this.wordAllowLen; } - public WordLengthResult wordAllowLen(int wordAllowLen){ - this.wordAllowLen=wordAllowLen; + + public WordLengthResult wordAllowLen(int wordAllowLen) { + this.wordAllowLen = wordAllowLen; return this; } - public int wordDenyLen(){ + public int wordDenyLen() { return this.wordDenyLen; } - public WordLengthResult wordDenyLen(int wordDenyLen){ - this.wordDenyLen=wordDenyLen; + + public WordLengthResult wordDenyLen(int wordDenyLen) { + this.wordDenyLen = wordDenyLen; return this; } + @Override + public String toString() { + return "WordLengthResult{" + + "wordAllowLen=" + wordAllowLen + + ", wordDenyLen=" + wordDenyLen + + '}'; + } } diff --git a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java index 670d08b..476bad0 100644 --- a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java @@ -72,44 +72,37 @@ public class BenchmarkBasicTest { } /** - * * 黑白名单一次遍历 优化前:300*他们在地铁口交易,查10000次,26183 - * * 黑白名单一次遍历 优化后:300*他们在地铁口交易,查10000次,15705 - * + * 黑白名单一次遍历 优化前:300*他们在地铁口交易,查10000次,26183 + * 黑白名单一次遍历 优化后:300*他们在地铁口交易,查10000次,15705 + * @since 0.24.2 */ @Test public void costTimeOneTraceTest() { - StringBuilder sb=new StringBuilder(); - for(int i=0;i<300;i++){ + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < 300; i++) { sb.append("他们在地铁口交易").append(i); } String text = sb.toString(); // 1W 次 long start = System.currentTimeMillis(); - SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() - .wordDeny(new IWordDeny() { - @Override - public List deny() { - return Collections.singletonList("口交"); - } - }) - .wordAllow(new IWordAllow() { - @Override - public List allow() { - return Collections.singletonList("地铁口交易"); - } - }) - .enableWordCheck(true) - .enableNumCheck(false) - .enableUrlCheck(false) - .enableEmailCheck(false) - .init(); + SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().wordDeny(new IWordDeny() { + @Override + public List deny() { + return Collections.singletonList("口交"); + } + }).wordAllow(new IWordAllow() { + @Override + public List allow() { + return Collections.singletonList("地铁口交易"); + } + }).enableWordCheck(true).enableNumCheck(false).enableUrlCheck(false).enableEmailCheck(false).init(); - for(int i = 0; i < 10000; i++) { + for (int i = 0; i < 10000; i++) { sensitiveWordBs.findAll(text); } long end = System.currentTimeMillis(); - System.out.println("------------------ COST: " + (end-start)); + System.out.println("------------------ COST: " + (end - start)); } /** From 49407d1489f21136ca2907b1b689c6c49e641dc1 Mon Sep 17 00:00:00 2001 From: houbb Date: Mon, 17 Feb 2025 12:45:07 +0800 Subject: [PATCH 5/7] release branch 0.25.0 --- CHANGE_LOG.md | 9 ++- README.md | 58 ++++++++++---- pom.xml | 2 +- release.bat | 4 +- .../sensitive/word/api/IWordContext.java | 19 +++++ .../sensitive/word/bs/SensitiveWordBs.java | 73 ++++++++++++++++++ .../word/bs/SensitiveWordContext.java | 76 +++++++++++++++++++ .../word/support/check/WordCheckUrl.java | 12 ++- .../support/check/WordCheckUrlNoPrefix.java | 32 ++++++++ .../word/support/check/WordChecks.java | 11 +++ .../combine/check/WordCheckCombine.java | 10 +-- .../word/bs/SensitiveWordBsConfigTest.java | 39 ++-------- .../bs/SensitiveWordBsUrlNoPrefixTest.java | 37 +++++++++ 13 files changed, 323 insertions(+), 59 deletions(-) create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrlNoPrefix.java create mode 100644 src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlNoPrefixTest.java diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 1ee18b6..26b715e 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -405,4 +405,11 @@ | 序号 | 变更类型 | 说明 | 时间 | 备注 | |:---|:-----|---------------------|:------------------|:-------------------| | 1 | O | findFirst 真实实现,性能优化 | 2025-2-2 15:30:26 | PR-99 | -| 2 | O | 黑白名单遍历统一优化,性能优化 | 2025-2-2 15:30:26 | PR-99 涉及到原始结果返回值调整 | \ No newline at end of file +| 2 | O | 黑白名单遍历统一优化,性能优化 | 2025-2-2 15:30:26 | PR-99 涉及到原始结果返回值调整 | + +# release_0.25.0 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|----------------------|:-------------------|:-----| +| 1 | A | wordCheck 策略支持用户自定义 | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 | +| 2 | A | wordCheckUrlNoPrefix | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 | diff --git a/README.md b/README.md index 6902280..3f159e7 100644 --- a/README.md +++ b/README.md @@ -58,21 +58,6 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大 [CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md) - -### V0.24.0 - -- 初步内置实现单词标签,丰富单词标签内置策略 - -### V0.24.1 - -- 删除时统一添加同步锁 sync - -### V0.24.2 - -- 统一黑白名单为一次遍历,性能优化 - -- 实现真实的 findFirst,性能优化 - ## 更多资料 ### 敏感词控台 @@ -111,7 +96,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大 com.github.houbb sensitive-word - 0.24.2 + 0.25.0 ``` @@ -377,6 +362,22 @@ Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString()); ## 更多检测策略 +### 说明 + +v0.25.0 目前的几个策略,也支持用户引导类自定义。所有的策略都是接口,支持用户自定义实现。 + +| 序号 | 方法 | 说明 | 默认值 | +|:---|:---------------------|:-------------------------------------------|:------| +| 16 | wordCheckNum | 数字检测策略(v0.25.0开始支持) | `WordChecks.num()` | +| 17 | wordCheckEmail | 邮箱检测策略(v0.25.0开始支持) | `WordChecks.email()` | +| 18 | wordCheckUrl | URL检测策略(v0.25.0开始支持),内置还是实现了 `urlNoPrefix()` | `(WordChecks.url()` | +| 19 | wordCheckIpv4 | ipv4检测策略(v0.25.0开始支持) | `WordChecks.ipv4()` | +| 20 | wordCheckWord | 敏感词检测策略(v0.25.0开始支持) | `WordChecks.word()` | + +内置实现: + +a) `WordChecks.urlNoPrefix()` 作为 url 的额外实现,可以不需要 `https://` 和 `http://` 前缀。 + ### 邮箱检测 邮箱等个人信息,默认未启用。 @@ -425,6 +426,21 @@ Assert.assertEquals("[https://www.baidu.com]", wordList.toString()); Assert.assertEquals("点击链接 ********************* 查看答案", sensitiveWordBs.replace(text)); ``` +v0.25.0 内置支持不需要 http 协议的前缀检测: + +```java +final String text = "点击链接 https://www.baidu.com 查看答案,当然也可以是 baidu.com、www.baidu.com"; + +final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() + .enableUrlCheck(true) // 启用URL检测 + .wordCheckUrl(WordChecks.urlNoPrefix()) //指定检测的方式 + .init(); +List wordList = sensitiveWordBs.findAll(text); +Assert.assertEquals("[www.baidu.com, baidu.com, www.baidu.com]", wordList.toString()); + +Assert.assertEquals("点击链接 https://************* 查看答案,当然也可以是 *********、*************", sensitiveWordBs.replace(text)); +``` + ### IPV4 检测 v0.17.0 支持 @@ -467,6 +483,11 @@ SensitiveWordBs wordBs = SensitiveWordBs.newInstance() .enableUrlCheck(false) .enableIpv4Check(false) .enableWordCheck(true) + .wordCheckNum(WordChecks.num()) + .wordCheckEmail(WordChecks.email()) + .wordCheckUrl(WordChecks.url()) + .wordCheckIpv4(WordChecks.ipv4()) + .wordCheckWord(WordChecks.word()) .numCheckLen(8) .wordTag(WordTags.none()) .charIgnore(SensitiveWordCharIgnores.defaults()) @@ -497,6 +518,11 @@ Assert.assertTrue(wordBs.contains(text)); | 13 | wordTag | 词对应的标签 | none | | 14 | charIgnore | 忽略的字符 | none | | 15 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 | +| 16 | wordCheckNum | 数字检测策略(v0.25.0开始支持) | `WordChecks.num()` | +| 17 | wordCheckEmail | 邮箱检测策略(v0.25.0开始支持) | `WordChecks.email()` | +| 18 | wordCheckUrl | URL检测策略(v0.25.0开始支持) | `(WordChecks.url()` | +| 19 | wordCheckIpv4 | ipv4检测策略(v0.25.0开始支持) | `WordChecks.ipv4()` | +| 20 | wordCheckWord | 敏感词检测策略(v0.25.0开始支持) | `WordChecks.word()` | ## 内存资源的释放 diff --git a/pom.xml b/pom.xml index 1f829ef..8e459a0 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.24.2 + 0.25.0 diff --git a/release.bat b/release.bat index ea94b29..a87d5d3 100644 --- a/release.bat +++ b/release.bat @@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.24.2 +SET version=0.25.0 :::: 新版本名称 -SET newVersion=0.25.0 +SET newVersion=0.26.0 :::: 组织名称 SET groupName=com.github.houbb :::: 项目名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java index 2dd3e85..2b50f08 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java @@ -275,4 +275,23 @@ public interface IWordContext { SensitiveWordContext wordResultCondition(IWordResultCondition wordResultCondition); + IWordCheck wordCheckWord(); + + SensitiveWordContext wordCheckWord(IWordCheck wordCheckWord); + + IWordCheck wordCheckNum(); + + SensitiveWordContext wordCheckNum(IWordCheck wordCheckNum); + + IWordCheck wordCheckEmail(); + + SensitiveWordContext wordCheckEmail(IWordCheck wordCheckEmail); + + IWordCheck wordCheckUrl(); + + SensitiveWordContext wordCheckUrl(IWordCheck wordCheckUrl); + + IWordCheck wordCheckIpv4(); + + SensitiveWordContext wordCheckIpv4(IWordCheck wordCheckIpv4); } diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index efbc36f..1471675 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -10,6 +10,7 @@ import com.github.houbb.sensitive.word.api.combine.IWordCheckCombine; import com.github.houbb.sensitive.word.api.combine.IWordFormatCombine; import com.github.houbb.sensitive.word.core.SensitiveWords; import com.github.houbb.sensitive.word.support.allow.WordAllows; +import com.github.houbb.sensitive.word.support.check.WordChecks; import com.github.houbb.sensitive.word.support.combine.allowdeny.WordAllowDenyCombines; import com.github.houbb.sensitive.word.support.combine.check.WordCheckCombines; import com.github.houbb.sensitive.word.support.combine.format.WordFormatCombines; @@ -182,6 +183,36 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { */ private IWordResultCondition wordResultCondition = WordResultConditions.alwaysTrue(); + /** + * 单词检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckWord = WordChecks.word(); + + /** + * 数字检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckNum = WordChecks.num(); + + /** + * email 检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckEmail = WordChecks.email(); + + /** + * URL 检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckUrl = WordChecks.url(); + + /** + * ipv4 检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckIpv4 = WordChecks.ipv4(); + /** * 新建验证实例 *

@@ -255,6 +286,13 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { context.enableWordCheck(enableWordCheck); context.enableIpv4Check(enableIpv4Check); + // 校验策略实现配置 + context.wordCheckWord(wordCheckWord); + context.wordCheckEmail(wordCheckEmail); + context.wordCheckNum(wordCheckNum); + context.wordCheckUrl(wordCheckUrl); + context.wordCheckIpv4(wordCheckIpv4); + // 额外配置 context.sensitiveCheckNumLen(numCheckLen); context.wordReplace(wordReplace); @@ -370,6 +408,41 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { return this; } + public SensitiveWordBs wordCheckWord(IWordCheck wordCheckWord) { + ArgUtil.notNull(wordCheckWord, "wordCheckWord"); + + this.wordCheckWord = wordCheckWord; + return this; + } + + public SensitiveWordBs wordCheckNum(IWordCheck wordCheckNum) { + ArgUtil.notNull(wordCheckNum, "wordCheckNum"); + + this.wordCheckNum = wordCheckNum; + return this; + } + + public SensitiveWordBs wordCheckEmail(IWordCheck wordCheckEmail) { + ArgUtil.notNull(wordCheckEmail, "wordCheckEmail"); + + this.wordCheckEmail = wordCheckEmail; + return this; + } + + public SensitiveWordBs wordCheckUrl(IWordCheck wordCheckUrl) { + ArgUtil.notNull(wordCheckUrl, "wordCheckUrl"); + + this.wordCheckUrl = wordCheckUrl; + return this; + } + + public SensitiveWordBs wordCheckIpv4(IWordCheck wordCheckIpv4) { + ArgUtil.notNull(wordCheckIpv4, "wordCheckIpv4"); + + this.wordCheckIpv4 = wordCheckIpv4; + return this; + } + //-------------------------------------------------------- 基础属性设置 /** * 是否启用 ipv4 校验 diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java index 30663e3..bcd884c 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java @@ -1,6 +1,7 @@ package com.github.houbb.sensitive.word.bs; import com.github.houbb.sensitive.word.api.*; +import com.github.houbb.sensitive.word.support.check.WordChecks; /** * 上下文 @@ -133,6 +134,36 @@ public class SensitiveWordContext implements IWordContext { */ private IWordResultCondition wordResultCondition; + /** + * 单词检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckWord; + + /** + * 数字检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckNum; + + /** + * email 检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckEmail; + + /** + * URL 检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckUrl; + + /** + * ipv4 检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckIpv4; + public IWordData wordData() { return wordData; } @@ -355,4 +386,49 @@ public class SensitiveWordContext implements IWordContext { this.wordResultCondition = wordResultCondition; return this; } + + public IWordCheck wordCheckWord() { + return wordCheckWord; + } + + public SensitiveWordContext wordCheckWord(IWordCheck wordCheckWord) { + this.wordCheckWord = wordCheckWord; + return this; + } + + public IWordCheck wordCheckNum() { + return wordCheckNum; + } + + public SensitiveWordContext wordCheckNum(IWordCheck wordCheckNum) { + this.wordCheckNum = wordCheckNum; + return this; + } + + public IWordCheck wordCheckEmail() { + return wordCheckEmail; + } + + public SensitiveWordContext wordCheckEmail(IWordCheck wordCheckEmail) { + this.wordCheckEmail = wordCheckEmail; + return this; + } + + public IWordCheck wordCheckUrl() { + return wordCheckUrl; + } + + public SensitiveWordContext wordCheckUrl(IWordCheck wordCheckUrl) { + this.wordCheckUrl = wordCheckUrl; + return this; + } + + public IWordCheck wordCheckIpv4() { + return wordCheckIpv4; + } + + public SensitiveWordContext wordCheckIpv4(IWordCheck wordCheckIpv4) { + this.wordCheckIpv4 = wordCheckIpv4; + return this; + } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java index 4ca748b..105b79c 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java @@ -60,7 +60,17 @@ public class WordCheckUrl extends AbstractConditionWordCheck { // 改为 http:// 或者 https:// 开头 String string = stringBuilder.toString(); - return RegexUtil.isUrl(string); + return isUrl(string); + } + + /** + * 是否为 URL + * @param text 原始文本 + * @return 结果 + * @since 0.25.0 + */ + protected boolean isUrl(final String text) { + return RegexUtil.isUrl(text); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrlNoPrefix.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrlNoPrefix.java new file mode 100644 index 0000000..4a6a491 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrlNoPrefix.java @@ -0,0 +1,32 @@ +package com.github.houbb.sensitive.word.support.check; + +import com.github.houbb.heaven.util.util.regex.RegexUtil; +import com.github.houbb.sensitive.word.api.IWordCheck; + +/** + * (1)暂时先粗略的处理 web-site + * (2)如果网址的最后为图片类型,则跳过。 + * (3)长度超过 70,直接结束。 + * + * 不包含前缀的实现策略 + * + * @author binbin.hou + * @since 0.25.0 + */ +public class WordCheckUrlNoPrefix extends WordCheckUrl { + + /** + * @since 0.3.0 + */ + private static final IWordCheck INSTANCE = new WordCheckUrlNoPrefix(); + + public static IWordCheck getInstance() { + return INSTANCE; + } + + @Override + protected boolean isUrl(String text) { + return RegexUtil.isWebSite(text); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java index 230bab2..d173c2d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java @@ -77,4 +77,15 @@ public final class WordChecks { return WordCheckIPV4.getInstance(); } + /** + * 不需要前缀的 urlPrefix + * 注意:这种检测方法可能会和代码中的包名称冲突 + * + * @return 实现 + * @since 0.25.0 + */ + public static IWordCheck urlNoPrefix() { + return WordCheckUrlNoPrefix.getInstance(); + } + } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java b/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java index 60205fb..61fbbb0 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java @@ -18,19 +18,19 @@ public class WordCheckCombine extends AbstractWordCheckCombine { List wordCheckList = new ArrayList<>(); if(context.enableWordCheck()) { - wordCheckList.add(WordChecks.word()); + wordCheckList.add(context.wordCheckWord()); } if(context.enableNumCheck()) { - wordCheckList.add(WordChecks.num()); + wordCheckList.add(context.wordCheckNum()); } if(context.enableEmailCheck()) { - wordCheckList.add(WordChecks.email()); + wordCheckList.add(context.wordCheckEmail()); } if(context.enableUrlCheck()) { - wordCheckList.add(WordChecks.url()); + wordCheckList.add(context.wordCheckUrl()); } if(context.enableIpv4Check()) { - wordCheckList.add(WordChecks.ipv4()); + wordCheckList.add(context.wordCheckIpv4()); } return wordCheckList; diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java index 14c8b32..0e16b23 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java @@ -4,6 +4,7 @@ import com.github.houbb.heaven.util.io.FileUtil; import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.sensitive.word.data.WordCountDto; import com.github.houbb.sensitive.word.support.allow.WordAllows; +import com.github.houbb.sensitive.word.support.check.WordChecks; import com.github.houbb.sensitive.word.support.deny.WordDenys; import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores; import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions; @@ -39,6 +40,11 @@ public class SensitiveWordBsConfigTest { .enableUrlCheck(false) .enableIpv4Check(false) .enableWordCheck(true) + .wordCheckNum(WordChecks.num()) + .wordCheckEmail(WordChecks.email()) + .wordCheckUrl(WordChecks.url()) + .wordCheckIpv4(WordChecks.ipv4()) + .wordCheckWord(WordChecks.word()) .numCheckLen(8) .wordTag(WordTags.none()) .charIgnore(SensitiveWordCharIgnores.defaults()) @@ -46,39 +52,6 @@ public class SensitiveWordBsConfigTest { .wordAllow(WordAllows.defaults()) .wordDeny(WordDenys.defaults()) .init(); - -// String dir = "D:\\code\\github\\houbb.github.io\\_posts"; -// File[] files = new File(dir).listFiles(); -// -// Set wordSet = new HashSet<>(); -// -// Map wordCountMap = new HashMap<>(); -// for(File file : files) { -// String content = FileUtil.getFileContent(file); -// List allWords = wordBs.findAll(content); -// -// for(String word : allWords) { -// Integer integer = wordCountMap.get(word); -// if(integer == null) { -// integer = 0; -// } -// -// integer++; -// wordCountMap.put(word, integer); -// } -// -// System.out.println(file.getName()); -// } -// -//// List wordCountDtoList = new ArrayList<>(); -// for(Map.Entry entry : wordCountMap.entrySet()) { -// if(entry.getValue() >= 3) { -// System.out.println(entry.getKey() + " : " + entry.getValue()); -// } -// } - -// Collections.sort(wordCountDtoList); -// System.out.println(wordCountDtoList); } @Test diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlNoPrefixTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlNoPrefixTest.java new file mode 100644 index 0000000..b5f6543 --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlNoPrefixTest.java @@ -0,0 +1,37 @@ +package com.github.houbb.sensitive.word.bs; + +import com.github.houbb.sensitive.word.support.check.WordChecks; +import org.junit.Assert; +import org.junit.Test; + +import java.util.List; + +/** + *

project: sensitive-word-SensitiveWordBsTest

+ *

create on 2020/1/7 23:43

+ * + * @author Administrator + * @since 0.25.0 + */ +public class SensitiveWordBsUrlNoPrefixTest { + + /** + * URL 检测 + * + * @since 0.25.0 + */ + @Test + public void urlNoPrefixTest() { + final String text = "点击链接 https://www.baidu.com 查看答案,当然也可以是 baidu.com、www.baidu.com"; + + final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() + .enableUrlCheck(true) // 启用URL检测 + .wordCheckUrl(WordChecks.urlNoPrefix()) //指定检测的方式 + .init(); + List wordList = sensitiveWordBs.findAll(text); + Assert.assertEquals("[www.baidu.com, baidu.com, www.baidu.com]", wordList.toString()); + + Assert.assertEquals("点击链接 https://************* 查看答案,当然也可以是 *********、*************", sensitiveWordBs.replace(text)); + } + +} From a6536db8599670e9e4756c204ece82ba99992f54 Mon Sep 17 00:00:00 2001 From: houbb Date: Mon, 17 Feb 2025 12:56:25 +0800 Subject: [PATCH 6/7] [Feature] add for new --- README.md | 43 ++++++++++--------- .../word/bs/SensitiveWordBsConfigTest.java | 2 + 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 3f159e7..27cbc28 100644 --- a/README.md +++ b/README.md @@ -501,28 +501,29 @@ Assert.assertTrue(wordBs.contains(text)); 其中各项配置的说明如下: -| 序号 | 方法 | 说明 | 默认值 | -|:---|:---------------------|:-----------------------------|:------| -| 1 | ignoreCase | 忽略大小写 | true | -| 2 | ignoreWidth | 忽略半角圆角 | true | -| 3 | ignoreNumStyle | 忽略数字的写法 | true | -| 4 | ignoreChineseStyle | 忽略中文的书写格式 | true | -| 5 | ignoreEnglishStyle | 忽略英文的书写格式 | true | -| 6 | ignoreRepeat | 忽略重复词 | false | -| 7 | enableNumCheck | 是否启用数字检测。 | false | -| 8 | enableEmailCheck | 是有启用邮箱检测 | false | -| 9 | enableUrlCheck | 是否启用链接检测 | false | -| 10 | enableIpv4Check | 是否启用IPv4检测 | false | -| 11 | enableWordCheck | 是否启用敏感单词检测 | true | -| 12 | numCheckLen | 数字检测,自定义指定长度。 | 8 | -| 13 | wordTag | 词对应的标签 | none | +| 序号 | 方法 | 说明 | 默认值 | +|:---|:--------------------|:-----------------------------|:------| +| 1 | ignoreCase | 忽略大小写 | true | +| 2 | ignoreWidth | 忽略半角圆角 | true | +| 3 | ignoreNumStyle | 忽略数字的写法 | true | +| 4 | ignoreChineseStyle | 忽略中文的书写格式 | true | +| 5 | ignoreEnglishStyle | 忽略英文的书写格式 | true | +| 6 | ignoreRepeat | 忽略重复词 | false | +| 7 | enableNumCheck | 是否启用数字检测。 | false | +| 8 | enableEmailCheck | 是有启用邮箱检测 | false | +| 9 | enableUrlCheck | 是否启用链接检测 | false | +| 10 | enableIpv4Check | 是否启用IPv4检测 | false | +| 11 | enableWordCheck | 是否启用敏感单词检测 | true | +| 12 | numCheckLen | 数字检测,自定义指定长度。 | 8 | +| 13 | wordTag | 词对应的标签 | none | | 14 | charIgnore | 忽略的字符 | none | -| 15 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 | -| 16 | wordCheckNum | 数字检测策略(v0.25.0开始支持) | `WordChecks.num()` | -| 17 | wordCheckEmail | 邮箱检测策略(v0.25.0开始支持) | `WordChecks.email()` | -| 18 | wordCheckUrl | URL检测策略(v0.25.0开始支持) | `(WordChecks.url()` | -| 19 | wordCheckIpv4 | ipv4检测策略(v0.25.0开始支持) | `WordChecks.ipv4()` | -| 20 | wordCheckWord | 敏感词检测策略(v0.25.0开始支持) | `WordChecks.word()` | +| 15 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 | +| 16 | wordCheckNum | 数字检测策略(v0.25.0开始支持) | `WordChecks.num()` | +| 17 | wordCheckEmail | 邮箱检测策略(v0.25.0开始支持) | `WordChecks.email()` | +| 18 | wordCheckUrl | URL检测策略(v0.25.0开始支持) | `(WordChecks.url()` | +| 19 | wordCheckIpv4 | ipv4检测策略(v0.25.0开始支持) | `WordChecks.ipv4()` | +| 20 | wordCheckWord | 敏感词检测策略(v0.25.0开始支持) | `WordChecks.word()` | +| 21 | wordReplace | 替换策略 | `WordReplaces.defaults()` | ## 内存资源的释放 diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java index 0e16b23..6612bcb 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java @@ -7,6 +7,7 @@ import com.github.houbb.sensitive.word.support.allow.WordAllows; import com.github.houbb.sensitive.word.support.check.WordChecks; import com.github.houbb.sensitive.word.support.deny.WordDenys; import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores; +import com.github.houbb.sensitive.word.support.replace.WordReplaces; import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions; import com.github.houbb.sensitive.word.support.tag.WordTags; import org.junit.Assert; @@ -51,6 +52,7 @@ public class SensitiveWordBsConfigTest { .wordResultCondition(WordResultConditions.alwaysTrue()) .wordAllow(WordAllows.defaults()) .wordDeny(WordDenys.defaults()) + .wordReplace(WordReplaces.defaults()) .init(); } From c068ed5171b6195f65c3becff15e22ea3958246a Mon Sep 17 00:00:00 2001 From: "binbin.hou" Date: Sun, 27 Apr 2025 18:37:15 +0800 Subject: [PATCH 7/7] add maxFirst --- CHANGE_LOG.md | 7 ++ .../support/check/WordCheckWordMaxLen.java | 96 +++++++++++++++++++ .../word/bs/SensitiveWordMaxFirstTest.java | 28 ++++++ 3 files changed, 131 insertions(+) create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java create mode 100644 src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 26b715e..7353a3b 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -413,3 +413,10 @@ |:---|:-----|----------------------|:-------------------|:-----| | 1 | A | wordCheck 策略支持用户自定义 | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 | | 2 | A | wordCheckUrlNoPrefix | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 | + +# release_0.25.0 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|----------------------|:-------------------|:-----| +| 1 | A | wordCheck 策略支持用户自定义 | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 | +| 2 | A | wordCheckUrlNoPrefix | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 | diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java new file mode 100644 index 0000000..b7c72c0 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java @@ -0,0 +1,96 @@ +package com.github.houbb.sensitive.word.support.check; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore; +import com.github.houbb.sensitive.word.api.IWordCheck; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.api.IWordData; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; +import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; +import com.github.houbb.sensitive.word.support.result.WordLengthResult; + +import java.util.Map; + +/** + * 敏感词监测实现 + * @author binbin.hou + * @since 0.26.0 + */ +@Deprecated +public class WordCheckWordMaxLen extends AbstractWordCheck { + + @Override + protected Class getSensitiveCheckClass() { + return WordCheckWordMaxLen.class; + } + + @Override + protected WordLengthResult getActualLength(int beginIndex, InnerSensitiveWordContext innerContext) { + final String txt = innerContext.originalText(); + final Map formatCharMapping = innerContext.formatCharMapping(); + final IWordContext context = innerContext.wordContext(); + final IWordData wordData = context.wordData(); + final IWordData wordDataAllow = context.wordDataAllow(); + final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore(); + + // 前一个条件 + StringBuilder stringBuilder = new StringBuilder(); + char[] rawChars = txt.toCharArray(); + + int tempLen = 0; + int maxWhite = 0; + int maxBlack = 0; + boolean firstCheck = true; + + WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext); + WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext); + + for (int i = beginIndex; i < rawChars.length; i++) { + if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) { + tempLen++; + continue; + } + + char mappingChar = formatCharMapping.get(rawChars[i]); + stringBuilder.append(mappingChar); + tempLen++; + + if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow)) { + wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext); + if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) { + maxWhite += tempLen; + wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND; + } + } + + // 黑名单命中 + if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) { + wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext); + if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) { + maxBlack += tempLen; + wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND; + } + } + + // 不再是第一次检测 + firstCheck = false; + + // 黑白名单都未匹配 + if (WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow) && + WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) { + break; + } + } + + return WordLengthResult.newInstance() + .wordAllowLen(maxWhite) + .wordDenyLen(maxBlack); + } + + @Override + protected String getType() { + return WordTypeEnum.WORD.getCode(); + } + +} diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java new file mode 100644 index 0000000..2c0f819 --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java @@ -0,0 +1,28 @@ +package com.github.houbb.sensitive.word.bs; + +import com.github.houbb.sensitive.word.api.IWordDeny; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +public class SensitiveWordMaxFirstTest { + + @Test + public void maxFirstTest() { + SensitiveWordBs bs = SensitiveWordBs.newInstance() + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Arrays.asList("我的世界", "我的"); + } + }).init(); + + String text = "我的世界我的好玩"; + + List textList = bs.findAll(text); +// Assert.assertEquals("", textList.toString()); + } + +}