From 894f690b6a276fd499f7e6f5c5f01f4d16ee8e59 Mon Sep 17 00:00:00 2001 From: houbb Date: Sat, 1 Jun 2024 16:42:53 +0800 Subject: [PATCH] release branch 0.17.0 --- CHANGE_LOG.md | 6 ++ README.md | 31 ++++++-- pom.xml | 2 +- release.bat | 4 +- .../sensitive/word/api/IWordContext.java | 15 ++++ .../sensitive/word/bs/SensitiveWordBs.java | 17 +++++ .../word/bs/SensitiveWordContext.java | 17 +++++ .../word/constant/enums/WordTypeEnum.java | 1 + .../word/support/check/WordCheckIPV4.java | 74 +++++++++++++++++++ .../word/support/check/WordChecks.java | 35 +++------ .../combine/check/WordCheckCombine.java | 3 + .../sensitive/word/utils/InnerCharUtils.java | 41 ++++++++++ .../sensitive/word/utils/package-info.java | 1 - .../word/bs/SensitiveWordBsConfigTest.java | 2 + .../word/bs/SensitiveWordBsIpv4Test.java | 38 ++++++++++ 15 files changed, 250 insertions(+), 37 deletions(-) create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckIPV4.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java delete mode 100644 src/test/java/ai/com/github/houbb/sensitive/word/utils/package-info.java create mode 100644 src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsIpv4Test.java diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 4ce2a5d..1e668ba 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -318,3 +318,9 @@ | 1 | O | 敏感词优化移除: 译码器/鞋帽/破解/看下/快乐水/冷却/招聘/搬迁/机票/谣言/第4代/赚钱/1989年/贫穷/护士/工作人员/网通/超速/明心/人民/服务管理/刺激/心水/ | 2024-6-01 15:02:25 | | | 2 | O | 敏感词优化移除: 影院/公关/韦小宝/挣钱/资本主义/人也/战争/水浒/女装/装修/妹妹/他爸爸/他妈妈 | 2024-6-01 15:02:25 | | | 3 | O | 默认关闭数字+网址+邮箱的校验 | 2024-6-01 15:02:25 | 降低误判率 | + +# release_0.17.0 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|---------|:------------------|:------| +| 1 | A | IPV4 校验 | 2024-6-01 15:02:25 | https://github.com/houbb/sensitive-word/issues/43 | diff --git a/README.md b/README.md index 27f24e1..1ecef90 100644 --- a/README.md +++ b/README.md @@ -353,6 +353,8 @@ Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString()); ### 邮箱检测 +邮箱等个人信息,默认未启用。 + ```java final String text = "楼主好人,邮箱 sensitiveword@xx.com"; List wordList = SensitiveWordBs.newInstance().enableEmailCheck(true).init().findAll(text); @@ -361,7 +363,7 @@ Assert.assertEquals("[sensitiveword@xx.com]", wordList.toString()); ### 连续数字检测 -一般用于过滤手机号/QQ等广告信息。 +一般用于过滤手机号/QQ等广告信息,默认未启用。 V0.2.1 之后,支持通过 `numCheckLen(长度)` 自定义检测的长度。 @@ -385,7 +387,7 @@ Assert.assertEquals("[]", wordList2.toString()); ### 网址检测 -用于过滤常见的网址信息。 +用于过滤常见的网址信息,默认未启用。 ```java final String text = "点击链接 www.baidu.com查看答案"; @@ -398,6 +400,19 @@ Assert.assertEquals("[www.baidu.com]", wordList.toString()); Assert.assertEquals("点击链接 *************查看答案", sensitiveWordBs.replace(text)); ``` +### IPV4 检测 + +v0.17.0 支持 + +避免用户通过 ip 绕过网址检测等,默认未启用。 + +```java +final String text = "个人网站,如果网址打不开可以访问 127.0.0.1。"; +final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().enableIpv4Check(true).init(); +List wordList = sensitiveWordBs.findAll(text); +Assert.assertEquals("[127.0.0.1]", wordList.toString()); +``` + # 引导类特性配置 ## 说明 @@ -423,6 +438,7 @@ SensitiveWordBs wordBs = SensitiveWordBs.newInstance() .enableNumCheck(false) .enableEmailCheck(false) .enableUrlCheck(false) + .enableIpv4Check(false) .enableWordCheck(true) .numCheckLen(8) .wordTag(WordTags.none()) @@ -448,11 +464,12 @@ Assert.assertTrue(wordBs.contains(text)); | 7 | enableNumCheck | 是否启用数字检测。 | false | | 8 | enableEmailCheck | 是有启用邮箱检测 | false | | 9 | enableUrlCheck | 是否启用链接检测 | false | -| 10 | enableWordCheck | 是否启用敏感单词检测 | true | -| 11 | numCheckLen | 数字检测,自定义指定长度。 | 8 | -| 12 | wordTag | 词对应的标签 | none | -| 13 | charIgnore | 忽略的字符 | none | -| 14 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 | +| 10 | enableIpv4Check | 是否启用IPv4检测 | false | +| 11 | enableWordCheck | 是否启用敏感单词检测 | true | +| 12 | numCheckLen | 数字检测,自定义指定长度。 | 8 | +| 13 | wordTag | 词对应的标签 | none | +| 14 | charIgnore | 忽略的字符 | none | +| 15 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 | ## 内存的释放 diff --git a/pom.xml b/pom.xml index db78889..76470d9 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.16.2 + 0.17.0 diff --git a/release.bat b/release.bat index 144df5a..eb89986 100644 --- a/release.bat +++ b/release.bat @@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.16.2 +SET version=0.17.0 :::: 新版本名称 -SET newVersion=0.17.0 +SET newVersion=0.18.0 :::: 组织名称 SET groupName=com.github.houbb :::: 项目名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java index bd3c057..d28e0c6 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java @@ -126,6 +126,21 @@ public interface IWordContext { */ IWordContext enableUrlCheck(final boolean enableUrlCheck); + /** + * 是否启用 ipv4 校验 + * @return 结果 + * @since 0.17.0 + */ + boolean enableIpv4Check(); + + /** + * 是否启用 ipv4 校验 + * @param enableIpv4Check 是否启用 + * @return this + * @since 0.17.0 + */ + IWordContext enableIpv4Check(final boolean enableIpv4Check); + /** * 忽略英文的写法 * @return 数字检测 diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index da742ad..49a4662 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -90,6 +90,12 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { */ private boolean enableWordCheck = true; + /** + * 是否启用 ipv4 校验 + * @since 0.17.0 + */ + private boolean enableIpv4Check = false; + // 额外配置 /** * 检测数字时的长度 @@ -233,6 +239,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { context.enableEmailCheck(enableEmailCheck); context.enableUrlCheck(enableUrlCheck); context.enableWordCheck(enableWordCheck); + context.enableIpv4Check(enableIpv4Check); // 额外配置 context.sensitiveCheckNumLen(numCheckLen); @@ -343,6 +350,16 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { } //-------------------------------------------------------- 基础属性设置 + /** + * 是否启用 ipv4 校验 + * @param enableIpv4Check 是否启用 + * @return this + * @since 0.17.0 + */ + public SensitiveWordBs enableIpv4Check(boolean enableIpv4Check) { + this.enableIpv4Check = enableIpv4Check; + return this; + } /** * 设置是否启动数字检测 diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java index 5598f76..54e960b 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java @@ -69,6 +69,12 @@ public class SensitiveWordContext implements IWordContext { */ private boolean enableUrlCheck; + /** + * 是否启用 ipv4 校验 + * @since 0.17.0 + */ + private boolean enableIpv4Check; + /** * 敏感数字检测对应的长度限制 * @since 0.2.1 @@ -218,6 +224,17 @@ public class SensitiveWordContext implements IWordContext { return this; } + @Override + public boolean enableIpv4Check() { + return enableIpv4Check; + } + + @Override + public SensitiveWordContext enableIpv4Check(boolean enableIpv4Check) { + this.enableIpv4Check = enableIpv4Check; + return this; + } + @Override public boolean ignoreChineseStyle() { return ignoreChineseStyle; diff --git a/src/main/java/com/github/houbb/sensitive/word/constant/enums/WordTypeEnum.java b/src/main/java/com/github/houbb/sensitive/word/constant/enums/WordTypeEnum.java index e26a8ea..c4d8b3d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/constant/enums/WordTypeEnum.java +++ b/src/main/java/com/github/houbb/sensitive/word/constant/enums/WordTypeEnum.java @@ -9,6 +9,7 @@ public enum WordTypeEnum { EMAIL("EMAIL", "邮箱"), URL("URL", "链接"), NUM("NUM", "数字"), + IPV4("IPV4", "IPv4"), DEFAULTS("DEFAULTS", "默认"), ; diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckIPV4.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckIPV4.java new file mode 100644 index 0000000..5583d6d --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckIPV4.java @@ -0,0 +1,74 @@ +package com.github.houbb.sensitive.word.support.check; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.heaven.util.lang.CharUtil; +import com.github.houbb.heaven.util.lang.StringUtil; +import com.github.houbb.heaven.util.util.regex.RegexUtil; +import com.github.houbb.sensitive.word.api.IWordCheck; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.constant.WordConst; +import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; +import com.github.houbb.sensitive.word.utils.InnerCharUtils; + +import java.util.List; + +/** + * IPV4 检测 + * + * @author binbin.hou + * @since 0.17.0 + */ +@ThreadSafe +public class WordCheckIPV4 extends AbstractConditionWordCheck { + + private static final IWordCheck INSTANCE = new WordCheckIPV4(); + + public static IWordCheck getInstance() { + return INSTANCE; + } + + @Override + protected Class getSensitiveCheckClass() { + return WordCheckIPV4.class; + } + + @Override + protected String getType() { + return WordTypeEnum.IPV4.getCode(); + } + + @Override + protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) { + return CharUtil.isNumber(mappingChar) || '.' == mappingChar; + } + + @Override + protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveWordContext checkContext) { + int bufferLen = stringBuilder.length(); + //0.0.0.0 + //255.255.255.255 + if(bufferLen < 7 + || bufferLen > 15) { + return false; + } + + // 尽可能减少对象的创建 + String string = stringBuilder.toString(); + List stringList = StringUtil.splitToList(string, '.'); + if(stringList.size() != 4) { + return false; + } + + for(String numStr : stringList) { + int integer = InnerCharUtils.parseInt(numStr); + + if(integer < 0 || integer > 256) { + return false; + } + } + + // 额外处理 + return true; + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java index e5d4ebd..230bab2 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java @@ -18,32 +18,6 @@ public final class WordChecks { private WordChecks(){} - /** - * 初始化敏感检测策略 - * @param context 上下文 - * - * @return 实现 - * @since 0.3.0 - */ - public static IWordCheck initSensitiveCheck(final IWordContext context) { - List sensitiveCheckList = new ArrayList<>(); - - if(context.enableWordCheck()) { - sensitiveCheckList.add(WordChecks.word()); - } - if(context.enableNumCheck()) { - sensitiveCheckList.add(WordChecks.num()); - } - if(context.enableEmailCheck()) { - sensitiveCheckList.add(WordChecks.email()); - } - if(context.enableUrlCheck()) { - sensitiveCheckList.add(WordChecks.url()); - } - - return WordChecks.chains(sensitiveCheckList); - } - public static IWordCheck chains(final IWordCheck... sensitiveChecks) { if (ArrayUtil.isEmpty(sensitiveChecks)){ return none(); @@ -94,4 +68,13 @@ public final class WordChecks { return WordCheckNone.getInstance(); } + /** + * ipv4 校验 + * @since 0.17.0 + * @return 实现 + */ + public static IWordCheck ipv4() { + return WordCheckIPV4.getInstance(); + } + } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java b/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java index 7209e4d..60205fb 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java @@ -29,6 +29,9 @@ public class WordCheckCombine extends AbstractWordCheckCombine { if(context.enableUrlCheck()) { wordCheckList.add(WordChecks.url()); } + if(context.enableIpv4Check()) { + wordCheckList.add(WordChecks.ipv4()); + } return wordCheckList; } diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java new file mode 100644 index 0000000..ce7d965 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java @@ -0,0 +1,41 @@ +package com.github.houbb.sensitive.word.utils; + +/** + * @since 0.17.0 + */ +public class InnerCharUtils { + + /** + * 转换为整数 + * @param text 文本 + * @return 整数 + * @since 1.18.0 + */ + public static int parseInt(String text) { + int len = text.length(); + + int sum = 0; + + int weight = 1; + char[] chars = text.toCharArray(); + for(int i = len-1; i >= 0; i--) { + int val = getCharInt(chars[i]); + + sum += weight * val; + + weight *= 10; + } + return sum; + } + + /** + * 获取 int char 对应的真实值 + * @param c 字符 + * @return 结果 + * @since 1.18.0 + */ + public static int getCharInt(final char c) { + return c - '0'; + } + +} diff --git a/src/test/java/ai/com/github/houbb/sensitive/word/utils/package-info.java b/src/test/java/ai/com/github/houbb/sensitive/word/utils/package-info.java deleted file mode 100644 index f27d3ec..0000000 --- a/src/test/java/ai/com/github/houbb/sensitive/word/utils/package-info.java +++ /dev/null @@ -1 +0,0 @@ -package ai.com.github.houbb.sensitive.word.utils; \ No newline at end of file diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java index 86f86f1..14c8b32 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java @@ -37,6 +37,8 @@ public class SensitiveWordBsConfigTest { .enableNumCheck(false) .enableEmailCheck(false) .enableUrlCheck(false) + .enableIpv4Check(false) + .enableWordCheck(true) .numCheckLen(8) .wordTag(WordTags.none()) .charIgnore(SensitiveWordCharIgnores.defaults()) diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsIpv4Test.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsIpv4Test.java new file mode 100644 index 0000000..d80a2ad --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsIpv4Test.java @@ -0,0 +1,38 @@ +package com.github.houbb.sensitive.word.bs; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.List; + +/** + + */ +public class SensitiveWordBsIpv4Test { + + /** + * ipv4 地址 + * @since 0.17.0 + */ + @Test + public void defaultTest() { + final String text = "个人网站,如果网址打不开可以访问 127.0.0.1。"; + final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().init(); + + List wordList = sensitiveWordBs.findAll(text); + Assert.assertEquals("[]", wordList.toString()); + } + + /** + * ipv4 地址 + * @since 0.17.0 + */ + @Test + public void ipv4CheckTest() { + final String text = "个人网站,如果网址打不开可以访问 127.0.0.1。"; + final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().enableIpv4Check(true).init(); + List wordList = sensitiveWordBs.findAll(text); + Assert.assertEquals("[127.0.0.1]", wordList.toString()); + } + +}