diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index f045cfc..8c40181 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -133,3 +133,10 @@ |:---|:---|:---|:---|:--| | 1 | A | 允许用户自定义替换策略 | 2022-01-15 23:51:58 | | | 2 | U | 升级二方数据库依赖 | 2022-01-15 23:51:58 | | + +# release_0.2.1 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:---|:---|:---|:--| +| 1 | O | 移除日志初始化的控台日志输出 | 2023-02-17 23:51:58 | | +| 2 | A | 支持数字检验的长度指定 | 2022-01-17 23:51:58 | | diff --git a/README.md b/README.md index 4f0acd4..737cc02 100644 --- a/README.md +++ b/README.md @@ -46,9 +46,9 @@ [CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/doc/CHANGE_LOG.md) -v0.2.0 变更: +v0.2.1 变更: -- 支持用户自定义替换策略 +- 支持用户自定义数字检测的长度 # 快速开始 @@ -64,7 +64,7 @@ v0.2.0 变更: com.github.houbb sensitive-word - 0.2.0 + 0.2.1 ``` @@ -298,6 +298,26 @@ List wordList = SensitiveWordHelper.findAll(text); Assert.assertEquals("[sensitiveword@xx.com]", wordList.toString()); ``` +## 连续数字检测 + +一般用于过滤手机号/QQ等广告信息。 + +V0.2.1 之后,支持通过 `numCheckLen(长度)` 自定义检测的长度。 + +```java +final String text = "你懂得:12345678"; + +// 默认检测 8 位 +List wordList = SensitiveWordBs.newInstance().findAll(text); +Assert.assertEquals("[12345678]", wordList.toString()); + +// 指定数字的长度,避免误杀 +List wordList2 = SensitiveWordBs.newInstance() + .numCheckLen(9) + .findAll(text); +Assert.assertEquals("[]", wordList2.toString()); +``` + # 特性配置 ## 说明 @@ -319,10 +339,11 @@ SensitiveWordBs wordBs = SensitiveWordBs.newInstance() .ignoreNumStyle(true) .ignoreChineseStyle(true) .ignoreEnglishStyle(true) - .ignoreRepeat(true) + .ignoreRepeat(false) .enableNumCheck(true) .enableEmailCheck(true) .enableUrlCheck(true) + .numCheckLen(8) .init(); final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; @@ -332,17 +353,18 @@ Assert.assertTrue(wordBs.contains(text)); 其中各项配置的说明如下: -| 序号 | 方法 | 说明 | -|:---|:---|:---| -| 1 | ignoreCase | 忽略大小写 | -| 2 | ignoreWidth | 忽略半角圆角 | -| 3 | ignoreNumStyle | 忽略数字的写法 | -| 4 | ignoreChineseStyle | 忽略中文的书写格式 | -| 5 | ignoreEnglishStyle | 忽略英文的书写格式 | -| 6 | ignoreRepeat | 忽略重复词 | -| 7 | enableNumCheck | 是否启用数字检测。默认连续 8 位数字认为是敏感词 | -| 8 | enableEmailCheck | 是有启用邮箱检测 | -| 9 | enableUrlCheck | 是否启用链接检测 | +| 序号 | 方法 | 说明 | +|:----|:---|:--------------| +| 1 | ignoreCase | 忽略大小写 | +| 2 | ignoreWidth | 忽略半角圆角 | +| 3 | ignoreNumStyle | 忽略数字的写法 | +| 4 | ignoreChineseStyle | 忽略中文的书写格式 | +| 5 | ignoreEnglishStyle | 忽略英文的书写格式 | +| 6 | ignoreRepeat | 忽略重复词 | +| 7 | enableNumCheck | 是否启用数字检测。 | +| 8 | enableEmailCheck | 是有启用邮箱检测 | +| 9 | enableUrlCheck | 是否启用链接检测 | +| 10 | numCheckLen | 数字检测,自定义指定长度。默认连续 8 位数字认为是敏感词 | # 动态加载(用户自定义) diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java index 12b219d..fe1a233 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java @@ -158,4 +158,19 @@ public interface IWordContext { */ IWordContext ignoreRepeat(final boolean ignoreRepeat); + /** + * 敏感数字检测 + * @return 数字检测 + * @since 0.2.1 + */ + int sensitiveCheckNumLen(); + + /** + * 设置敏感数字检测长度 + * @param sensitiveCheckNumLen 数字格式检测长度 + * @return this + * @since 0.2.1 + */ + IWordContext sensitiveCheckNumLen(final int sensitiveCheckNumLen); + } diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index 33cb4be..c2909af 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -190,6 +190,17 @@ public class SensitiveWordBs { return this; } + /** + * 检测敏感词对应的长度限制,便于用户灵活定义 + * @param numCheckLen 长度 + * @return this + * @since 0.2.1 + */ + public SensitiveWordBs numCheckLen(int numCheckLen) { + this.context.sensitiveCheckNumLen(numCheckLen); + return this; + } + /** * 设置是否启动 email 检测 * @@ -301,6 +312,9 @@ public class SensitiveWordBs { wordContext.sensitiveCheckEmail(true); wordContext.sensitiveCheckUrl(true); + // 额外配置 + wordContext.sensitiveCheckNumLen(8); + return wordContext; } diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java index 2a1ad9d..8b2e4c1 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java @@ -71,6 +71,12 @@ public class SensitiveWordContext implements IWordContext { */ private boolean sensitiveCheckUrl; + /** + * 敏感数字检测对应的长度限制 + * @since 0.2.1 + */ + private int sensitiveCheckNumLen; + /** * 私有化构造器 * @since 0.0.4 @@ -196,4 +202,16 @@ public class SensitiveWordContext implements IWordContext { this.sensitiveCheckUrl = sensitiveCheckUrl; return this; } + + @Override + public int sensitiveCheckNumLen() { + return sensitiveCheckNumLen; + } + + @Override + public SensitiveWordContext sensitiveCheckNumLen(int sensitiveCheckNumLen) { + this.sensitiveCheckNumLen = sensitiveCheckNumLen; + return this; + } + } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java index 74bd783..7facc1f 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java @@ -34,7 +34,7 @@ public class SensitiveCheckNum implements ISensitiveCheck { lengthCount++; // 满足结束的条件 - boolean isCondition = isCondition(lengthCount); + boolean isCondition = isCondition(lengthCount, context); if (isCondition) { // 只在匹配到结束的时候才记录长度,避免不完全匹配导致的问题。 actualLength = lengthCount; @@ -57,11 +57,13 @@ public class SensitiveCheckNum implements ISensitiveCheck { * 这里指定一个阈值条件 * TODO: 这里有一个问题,会把一些 url 中的数字替换掉。 * @param lengthCount 长度 + * @param context 上下文 * @return 是否满足条件 * @since 0.0.5 */ - private boolean isCondition(final int lengthCount) { - return lengthCount >= 8; + protected boolean isCondition(final int lengthCount, + final IWordContext context) { + return lengthCount >= context.sensitiveCheckNumLen(); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java b/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java index 0216109..f0159f0 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java @@ -41,7 +41,6 @@ public class SensitiveWordData implements IWordData { defaultLines = CollectionUtil.difference(defaultLines, allowList); long end = System.currentTimeMillis(); - System.out.println("Sensitive data loaded!, cost time: " + (end - start) + "ms"); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java index 18e34da..dc7b28f 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java @@ -101,7 +101,6 @@ public class SensitiveWordMap implements IWordMap { this.innerWordMap = newInnerWordMap; long endTime = System.currentTimeMillis(); - System.out.println("Init sensitive word map end! Cost time: " + (endTime - startTime) + "ms"); } /** diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java index 3a402fd..11cebc5 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java @@ -26,6 +26,7 @@ public class SensitiveWordBsConfigTest { .enableNumCheck(true) .enableEmailCheck(true) .enableUrlCheck(true) + .numCheckLen(8) .init(); final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumLenTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumLenTest.java new file mode 100644 index 0000000..8383834 --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumLenTest.java @@ -0,0 +1,37 @@ +package com.github.houbb.sensitive.word.bs; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.List; + +/** + *

project: sensitive-word-SensitiveWordBsTest

+ *

create on 2020/1/7 23:43

+ * + * @author Administrator + * @since 0.2.1 + */ +public class SensitiveWordBsNumLenTest { + + /** + * 返回所有敏感词 + * @since 0.2.1 + */ + @Test + public void findAllTest() { + final String text = "你懂得:12345678"; + + // 默认检测 8 位 + List wordList = SensitiveWordBs.newInstance().findAll(text); + Assert.assertEquals("[12345678]", wordList.toString()); + + // 指定数字的长度,避免误杀 + List wordList2 = SensitiveWordBs.newInstance() + .numCheckLen(9) + .findAll(text); + Assert.assertEquals("[]", wordList2.toString()); + } + + +}