diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 1ee18b6..26b715e 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -405,4 +405,11 @@ | 序号 | 变更类型 | 说明 | 时间 | 备注 | |:---|:-----|---------------------|:------------------|:-------------------| | 1 | O | findFirst 真实实现,性能优化 | 2025-2-2 15:30:26 | PR-99 | -| 2 | O | 黑白名单遍历统一优化,性能优化 | 2025-2-2 15:30:26 | PR-99 涉及到原始结果返回值调整 | \ No newline at end of file +| 2 | O | 黑白名单遍历统一优化,性能优化 | 2025-2-2 15:30:26 | PR-99 涉及到原始结果返回值调整 | + +# release_0.25.0 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|----------------------|:-------------------|:-----| +| 1 | A | wordCheck 策略支持用户自定义 | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 | +| 2 | A | wordCheckUrlNoPrefix | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 | diff --git a/README.md b/README.md index 6902280..3f159e7 100644 --- a/README.md +++ b/README.md @@ -58,21 +58,6 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大 [CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md) - -### V0.24.0 - -- 初步内置实现单词标签,丰富单词标签内置策略 - -### V0.24.1 - -- 删除时统一添加同步锁 sync - -### V0.24.2 - -- 统一黑白名单为一次遍历,性能优化 - -- 实现真实的 findFirst,性能优化 - ## 更多资料 ### 敏感词控台 @@ -111,7 +96,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大 com.github.houbb sensitive-word - 0.24.2 + 0.25.0 ``` @@ -377,6 +362,22 @@ Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString()); ## 更多检测策略 +### 说明 + +v0.25.0 目前的几个策略,也支持用户引导类自定义。所有的策略都是接口,支持用户自定义实现。 + +| 序号 | 方法 | 说明 | 默认值 | +|:---|:---------------------|:-------------------------------------------|:------| +| 16 | wordCheckNum | 数字检测策略(v0.25.0开始支持) | `WordChecks.num()` | +| 17 | wordCheckEmail | 邮箱检测策略(v0.25.0开始支持) | `WordChecks.email()` | +| 18 | wordCheckUrl | URL检测策略(v0.25.0开始支持),内置还是实现了 `urlNoPrefix()` | `(WordChecks.url()` | +| 19 | wordCheckIpv4 | ipv4检测策略(v0.25.0开始支持) | `WordChecks.ipv4()` | +| 20 | wordCheckWord | 敏感词检测策略(v0.25.0开始支持) | `WordChecks.word()` | + +内置实现: + +a) `WordChecks.urlNoPrefix()` 作为 url 的额外实现,可以不需要 `https://` 和 `http://` 前缀。 + ### 邮箱检测 邮箱等个人信息,默认未启用。 @@ -425,6 +426,21 @@ Assert.assertEquals("[https://www.baidu.com]", wordList.toString()); Assert.assertEquals("点击链接 ********************* 查看答案", sensitiveWordBs.replace(text)); ``` +v0.25.0 内置支持不需要 http 协议的前缀检测: + +```java +final String text = "点击链接 https://www.baidu.com 查看答案,当然也可以是 baidu.com、www.baidu.com"; + +final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() + .enableUrlCheck(true) // 启用URL检测 + .wordCheckUrl(WordChecks.urlNoPrefix()) //指定检测的方式 + .init(); +List wordList = sensitiveWordBs.findAll(text); +Assert.assertEquals("[www.baidu.com, baidu.com, www.baidu.com]", wordList.toString()); + +Assert.assertEquals("点击链接 https://************* 查看答案,当然也可以是 *********、*************", sensitiveWordBs.replace(text)); +``` + ### IPV4 检测 v0.17.0 支持 @@ -467,6 +483,11 @@ SensitiveWordBs wordBs = SensitiveWordBs.newInstance() .enableUrlCheck(false) .enableIpv4Check(false) .enableWordCheck(true) + .wordCheckNum(WordChecks.num()) + .wordCheckEmail(WordChecks.email()) + .wordCheckUrl(WordChecks.url()) + .wordCheckIpv4(WordChecks.ipv4()) + .wordCheckWord(WordChecks.word()) .numCheckLen(8) .wordTag(WordTags.none()) .charIgnore(SensitiveWordCharIgnores.defaults()) @@ -497,6 +518,11 @@ Assert.assertTrue(wordBs.contains(text)); | 13 | wordTag | 词对应的标签 | none | | 14 | charIgnore | 忽略的字符 | none | | 15 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 | +| 16 | wordCheckNum | 数字检测策略(v0.25.0开始支持) | `WordChecks.num()` | +| 17 | wordCheckEmail | 邮箱检测策略(v0.25.0开始支持) | `WordChecks.email()` | +| 18 | wordCheckUrl | URL检测策略(v0.25.0开始支持) | `(WordChecks.url()` | +| 19 | wordCheckIpv4 | ipv4检测策略(v0.25.0开始支持) | `WordChecks.ipv4()` | +| 20 | wordCheckWord | 敏感词检测策略(v0.25.0开始支持) | `WordChecks.word()` | ## 内存资源的释放 diff --git a/pom.xml b/pom.xml index 1f829ef..8e459a0 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.24.2 + 0.25.0 diff --git a/release.bat b/release.bat index ea94b29..a87d5d3 100644 --- a/release.bat +++ b/release.bat @@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.24.2 +SET version=0.25.0 :::: 新版本名称 -SET newVersion=0.25.0 +SET newVersion=0.26.0 :::: 组织名称 SET groupName=com.github.houbb :::: 项目名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java index 2dd3e85..2b50f08 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java @@ -275,4 +275,23 @@ public interface IWordContext { SensitiveWordContext wordResultCondition(IWordResultCondition wordResultCondition); + IWordCheck wordCheckWord(); + + SensitiveWordContext wordCheckWord(IWordCheck wordCheckWord); + + IWordCheck wordCheckNum(); + + SensitiveWordContext wordCheckNum(IWordCheck wordCheckNum); + + IWordCheck wordCheckEmail(); + + SensitiveWordContext wordCheckEmail(IWordCheck wordCheckEmail); + + IWordCheck wordCheckUrl(); + + SensitiveWordContext wordCheckUrl(IWordCheck wordCheckUrl); + + IWordCheck wordCheckIpv4(); + + SensitiveWordContext wordCheckIpv4(IWordCheck wordCheckIpv4); } diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index efbc36f..1471675 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -10,6 +10,7 @@ import com.github.houbb.sensitive.word.api.combine.IWordCheckCombine; import com.github.houbb.sensitive.word.api.combine.IWordFormatCombine; import com.github.houbb.sensitive.word.core.SensitiveWords; import com.github.houbb.sensitive.word.support.allow.WordAllows; +import com.github.houbb.sensitive.word.support.check.WordChecks; import com.github.houbb.sensitive.word.support.combine.allowdeny.WordAllowDenyCombines; import com.github.houbb.sensitive.word.support.combine.check.WordCheckCombines; import com.github.houbb.sensitive.word.support.combine.format.WordFormatCombines; @@ -182,6 +183,36 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { */ private IWordResultCondition wordResultCondition = WordResultConditions.alwaysTrue(); + /** + * 单词检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckWord = WordChecks.word(); + + /** + * 数字检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckNum = WordChecks.num(); + + /** + * email 检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckEmail = WordChecks.email(); + + /** + * URL 检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckUrl = WordChecks.url(); + + /** + * ipv4 检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckIpv4 = WordChecks.ipv4(); + /** * 新建验证实例 *

@@ -255,6 +286,13 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { context.enableWordCheck(enableWordCheck); context.enableIpv4Check(enableIpv4Check); + // 校验策略实现配置 + context.wordCheckWord(wordCheckWord); + context.wordCheckEmail(wordCheckEmail); + context.wordCheckNum(wordCheckNum); + context.wordCheckUrl(wordCheckUrl); + context.wordCheckIpv4(wordCheckIpv4); + // 额外配置 context.sensitiveCheckNumLen(numCheckLen); context.wordReplace(wordReplace); @@ -370,6 +408,41 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { return this; } + public SensitiveWordBs wordCheckWord(IWordCheck wordCheckWord) { + ArgUtil.notNull(wordCheckWord, "wordCheckWord"); + + this.wordCheckWord = wordCheckWord; + return this; + } + + public SensitiveWordBs wordCheckNum(IWordCheck wordCheckNum) { + ArgUtil.notNull(wordCheckNum, "wordCheckNum"); + + this.wordCheckNum = wordCheckNum; + return this; + } + + public SensitiveWordBs wordCheckEmail(IWordCheck wordCheckEmail) { + ArgUtil.notNull(wordCheckEmail, "wordCheckEmail"); + + this.wordCheckEmail = wordCheckEmail; + return this; + } + + public SensitiveWordBs wordCheckUrl(IWordCheck wordCheckUrl) { + ArgUtil.notNull(wordCheckUrl, "wordCheckUrl"); + + this.wordCheckUrl = wordCheckUrl; + return this; + } + + public SensitiveWordBs wordCheckIpv4(IWordCheck wordCheckIpv4) { + ArgUtil.notNull(wordCheckIpv4, "wordCheckIpv4"); + + this.wordCheckIpv4 = wordCheckIpv4; + return this; + } + //-------------------------------------------------------- 基础属性设置 /** * 是否启用 ipv4 校验 diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java index 30663e3..bcd884c 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java @@ -1,6 +1,7 @@ package com.github.houbb.sensitive.word.bs; import com.github.houbb.sensitive.word.api.*; +import com.github.houbb.sensitive.word.support.check.WordChecks; /** * 上下文 @@ -133,6 +134,36 @@ public class SensitiveWordContext implements IWordContext { */ private IWordResultCondition wordResultCondition; + /** + * 单词检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckWord; + + /** + * 数字检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckNum; + + /** + * email 检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckEmail; + + /** + * URL 检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckUrl; + + /** + * ipv4 检测策略 + * @since 0.25.0 + */ + private IWordCheck wordCheckIpv4; + public IWordData wordData() { return wordData; } @@ -355,4 +386,49 @@ public class SensitiveWordContext implements IWordContext { this.wordResultCondition = wordResultCondition; return this; } + + public IWordCheck wordCheckWord() { + return wordCheckWord; + } + + public SensitiveWordContext wordCheckWord(IWordCheck wordCheckWord) { + this.wordCheckWord = wordCheckWord; + return this; + } + + public IWordCheck wordCheckNum() { + return wordCheckNum; + } + + public SensitiveWordContext wordCheckNum(IWordCheck wordCheckNum) { + this.wordCheckNum = wordCheckNum; + return this; + } + + public IWordCheck wordCheckEmail() { + return wordCheckEmail; + } + + public SensitiveWordContext wordCheckEmail(IWordCheck wordCheckEmail) { + this.wordCheckEmail = wordCheckEmail; + return this; + } + + public IWordCheck wordCheckUrl() { + return wordCheckUrl; + } + + public SensitiveWordContext wordCheckUrl(IWordCheck wordCheckUrl) { + this.wordCheckUrl = wordCheckUrl; + return this; + } + + public IWordCheck wordCheckIpv4() { + return wordCheckIpv4; + } + + public SensitiveWordContext wordCheckIpv4(IWordCheck wordCheckIpv4) { + this.wordCheckIpv4 = wordCheckIpv4; + return this; + } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java index 4ca748b..105b79c 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java @@ -60,7 +60,17 @@ public class WordCheckUrl extends AbstractConditionWordCheck { // 改为 http:// 或者 https:// 开头 String string = stringBuilder.toString(); - return RegexUtil.isUrl(string); + return isUrl(string); + } + + /** + * 是否为 URL + * @param text 原始文本 + * @return 结果 + * @since 0.25.0 + */ + protected boolean isUrl(final String text) { + return RegexUtil.isUrl(text); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrlNoPrefix.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrlNoPrefix.java new file mode 100644 index 0000000..4a6a491 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrlNoPrefix.java @@ -0,0 +1,32 @@ +package com.github.houbb.sensitive.word.support.check; + +import com.github.houbb.heaven.util.util.regex.RegexUtil; +import com.github.houbb.sensitive.word.api.IWordCheck; + +/** + * (1)暂时先粗略的处理 web-site + * (2)如果网址的最后为图片类型,则跳过。 + * (3)长度超过 70,直接结束。 + * + * 不包含前缀的实现策略 + * + * @author binbin.hou + * @since 0.25.0 + */ +public class WordCheckUrlNoPrefix extends WordCheckUrl { + + /** + * @since 0.3.0 + */ + private static final IWordCheck INSTANCE = new WordCheckUrlNoPrefix(); + + public static IWordCheck getInstance() { + return INSTANCE; + } + + @Override + protected boolean isUrl(String text) { + return RegexUtil.isWebSite(text); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java index 230bab2..d173c2d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java @@ -77,4 +77,15 @@ public final class WordChecks { return WordCheckIPV4.getInstance(); } + /** + * 不需要前缀的 urlPrefix + * 注意:这种检测方法可能会和代码中的包名称冲突 + * + * @return 实现 + * @since 0.25.0 + */ + public static IWordCheck urlNoPrefix() { + return WordCheckUrlNoPrefix.getInstance(); + } + } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java b/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java index 60205fb..61fbbb0 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java @@ -18,19 +18,19 @@ public class WordCheckCombine extends AbstractWordCheckCombine { List wordCheckList = new ArrayList<>(); if(context.enableWordCheck()) { - wordCheckList.add(WordChecks.word()); + wordCheckList.add(context.wordCheckWord()); } if(context.enableNumCheck()) { - wordCheckList.add(WordChecks.num()); + wordCheckList.add(context.wordCheckNum()); } if(context.enableEmailCheck()) { - wordCheckList.add(WordChecks.email()); + wordCheckList.add(context.wordCheckEmail()); } if(context.enableUrlCheck()) { - wordCheckList.add(WordChecks.url()); + wordCheckList.add(context.wordCheckUrl()); } if(context.enableIpv4Check()) { - wordCheckList.add(WordChecks.ipv4()); + wordCheckList.add(context.wordCheckIpv4()); } return wordCheckList; diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java index 14c8b32..0e16b23 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java @@ -4,6 +4,7 @@ import com.github.houbb.heaven.util.io.FileUtil; import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.sensitive.word.data.WordCountDto; import com.github.houbb.sensitive.word.support.allow.WordAllows; +import com.github.houbb.sensitive.word.support.check.WordChecks; import com.github.houbb.sensitive.word.support.deny.WordDenys; import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores; import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions; @@ -39,6 +40,11 @@ public class SensitiveWordBsConfigTest { .enableUrlCheck(false) .enableIpv4Check(false) .enableWordCheck(true) + .wordCheckNum(WordChecks.num()) + .wordCheckEmail(WordChecks.email()) + .wordCheckUrl(WordChecks.url()) + .wordCheckIpv4(WordChecks.ipv4()) + .wordCheckWord(WordChecks.word()) .numCheckLen(8) .wordTag(WordTags.none()) .charIgnore(SensitiveWordCharIgnores.defaults()) @@ -46,39 +52,6 @@ public class SensitiveWordBsConfigTest { .wordAllow(WordAllows.defaults()) .wordDeny(WordDenys.defaults()) .init(); - -// String dir = "D:\\code\\github\\houbb.github.io\\_posts"; -// File[] files = new File(dir).listFiles(); -// -// Set wordSet = new HashSet<>(); -// -// Map wordCountMap = new HashMap<>(); -// for(File file : files) { -// String content = FileUtil.getFileContent(file); -// List allWords = wordBs.findAll(content); -// -// for(String word : allWords) { -// Integer integer = wordCountMap.get(word); -// if(integer == null) { -// integer = 0; -// } -// -// integer++; -// wordCountMap.put(word, integer); -// } -// -// System.out.println(file.getName()); -// } -// -//// List wordCountDtoList = new ArrayList<>(); -// for(Map.Entry entry : wordCountMap.entrySet()) { -// if(entry.getValue() >= 3) { -// System.out.println(entry.getKey() + " : " + entry.getValue()); -// } -// } - -// Collections.sort(wordCountDtoList); -// System.out.println(wordCountDtoList); } @Test diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlNoPrefixTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlNoPrefixTest.java new file mode 100644 index 0000000..b5f6543 --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlNoPrefixTest.java @@ -0,0 +1,37 @@ +package com.github.houbb.sensitive.word.bs; + +import com.github.houbb.sensitive.word.support.check.WordChecks; +import org.junit.Assert; +import org.junit.Test; + +import java.util.List; + +/** + *

project: sensitive-word-SensitiveWordBsTest

+ *

create on 2020/1/7 23:43

+ * + * @author Administrator + * @since 0.25.0 + */ +public class SensitiveWordBsUrlNoPrefixTest { + + /** + * URL 检测 + * + * @since 0.25.0 + */ + @Test + public void urlNoPrefixTest() { + final String text = "点击链接 https://www.baidu.com 查看答案,当然也可以是 baidu.com、www.baidu.com"; + + final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() + .enableUrlCheck(true) // 启用URL检测 + .wordCheckUrl(WordChecks.urlNoPrefix()) //指定检测的方式 + .init(); + List wordList = sensitiveWordBs.findAll(text); + Assert.assertEquals("[www.baidu.com, baidu.com, www.baidu.com]", wordList.toString()); + + Assert.assertEquals("点击链接 https://************* 查看答案,当然也可以是 *********、*************", sensitiveWordBs.replace(text)); + } + +}