wordList2 = SensitiveWordBs.newInstance()
+ .numCheckLen(9)
+ .findAll(text);
+Assert.assertEquals("[]", wordList2.toString());
+```
+
# 特性配置
## 说明
@@ -319,10 +339,11 @@ SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
.ignoreNumStyle(true)
.ignoreChineseStyle(true)
.ignoreEnglishStyle(true)
- .ignoreRepeat(true)
+ .ignoreRepeat(false)
.enableNumCheck(true)
.enableEmailCheck(true)
.enableUrlCheck(true)
+ .numCheckLen(8)
.init();
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
@@ -332,17 +353,18 @@ Assert.assertTrue(wordBs.contains(text));
其中各项配置的说明如下:
-| 序号 | 方法 | 说明 |
-|:---|:---|:---|
-| 1 | ignoreCase | 忽略大小写 |
-| 2 | ignoreWidth | 忽略半角圆角 |
-| 3 | ignoreNumStyle | 忽略数字的写法 |
-| 4 | ignoreChineseStyle | 忽略中文的书写格式 |
-| 5 | ignoreEnglishStyle | 忽略英文的书写格式 |
-| 6 | ignoreRepeat | 忽略重复词 |
-| 7 | enableNumCheck | 是否启用数字检测。默认连续 8 位数字认为是敏感词 |
-| 8 | enableEmailCheck | 是有启用邮箱检测 |
-| 9 | enableUrlCheck | 是否启用链接检测 |
+| 序号 | 方法 | 说明 |
+|:----|:---|:--------------|
+| 1 | ignoreCase | 忽略大小写 |
+| 2 | ignoreWidth | 忽略半角圆角 |
+| 3 | ignoreNumStyle | 忽略数字的写法 |
+| 4 | ignoreChineseStyle | 忽略中文的书写格式 |
+| 5 | ignoreEnglishStyle | 忽略英文的书写格式 |
+| 6 | ignoreRepeat | 忽略重复词 |
+| 7 | enableNumCheck | 是否启用数字检测。 |
+| 8 | enableEmailCheck | 是有启用邮箱检测 |
+| 9 | enableUrlCheck | 是否启用链接检测 |
+| 10 | numCheckLen | 数字检测,自定义指定长度。默认连续 8 位数字认为是敏感词 |
# 动态加载(用户自定义)
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
index 12b219d..fe1a233 100644
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
@@ -158,4 +158,19 @@ public interface IWordContext {
*/
IWordContext ignoreRepeat(final boolean ignoreRepeat);
+ /**
+ * 敏感数字检测
+ * @return 数字检测
+ * @since 0.2.1
+ */
+ int sensitiveCheckNumLen();
+
+ /**
+ * 设置敏感数字检测长度
+ * @param sensitiveCheckNumLen 数字格式检测长度
+ * @return this
+ * @since 0.2.1
+ */
+ IWordContext sensitiveCheckNumLen(final int sensitiveCheckNumLen);
+
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
index 33cb4be..c2909af 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
@@ -190,6 +190,17 @@ public class SensitiveWordBs {
return this;
}
+ /**
+ * 检测敏感词对应的长度限制,便于用户灵活定义
+ * @param numCheckLen 长度
+ * @return this
+ * @since 0.2.1
+ */
+ public SensitiveWordBs numCheckLen(int numCheckLen) {
+ this.context.sensitiveCheckNumLen(numCheckLen);
+ return this;
+ }
+
/**
* 设置是否启动 email 检测
*
@@ -301,6 +312,9 @@ public class SensitiveWordBs {
wordContext.sensitiveCheckEmail(true);
wordContext.sensitiveCheckUrl(true);
+ // 额外配置
+ wordContext.sensitiveCheckNumLen(8);
+
return wordContext;
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
index 2a1ad9d..8b2e4c1 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
@@ -71,6 +71,12 @@ public class SensitiveWordContext implements IWordContext {
*/
private boolean sensitiveCheckUrl;
+ /**
+ * 敏感数字检测对应的长度限制
+ * @since 0.2.1
+ */
+ private int sensitiveCheckNumLen;
+
/**
* 私有化构造器
* @since 0.0.4
@@ -196,4 +202,16 @@ public class SensitiveWordContext implements IWordContext {
this.sensitiveCheckUrl = sensitiveCheckUrl;
return this;
}
+
+ @Override
+ public int sensitiveCheckNumLen() {
+ return sensitiveCheckNumLen;
+ }
+
+ @Override
+ public SensitiveWordContext sensitiveCheckNumLen(int sensitiveCheckNumLen) {
+ this.sensitiveCheckNumLen = sensitiveCheckNumLen;
+ return this;
+ }
+
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java
index 74bd783..7facc1f 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java
@@ -34,7 +34,7 @@ public class SensitiveCheckNum implements ISensitiveCheck {
lengthCount++;
// 满足结束的条件
- boolean isCondition = isCondition(lengthCount);
+ boolean isCondition = isCondition(lengthCount, context);
if (isCondition) {
// 只在匹配到结束的时候才记录长度,避免不完全匹配导致的问题。
actualLength = lengthCount;
@@ -57,11 +57,13 @@ public class SensitiveCheckNum implements ISensitiveCheck {
* 这里指定一个阈值条件
* TODO: 这里有一个问题,会把一些 url 中的数字替换掉。
* @param lengthCount 长度
+ * @param context 上下文
* @return 是否满足条件
* @since 0.0.5
*/
- private boolean isCondition(final int lengthCount) {
- return lengthCount >= 8;
+ protected boolean isCondition(final int lengthCount,
+ final IWordContext context) {
+ return lengthCount >= context.sensitiveCheckNumLen();
}
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java b/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java
index 0216109..f0159f0 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java
@@ -41,7 +41,6 @@ public class SensitiveWordData implements IWordData {
defaultLines = CollectionUtil.difference(defaultLines, allowList);
long end = System.currentTimeMillis();
- System.out.println("Sensitive data loaded!, cost time: " + (end - start) + "ms");
}
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
index 18e34da..dc7b28f 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
@@ -101,7 +101,6 @@ public class SensitiveWordMap implements IWordMap {
this.innerWordMap = newInnerWordMap;
long endTime = System.currentTimeMillis();
- System.out.println("Init sensitive word map end! Cost time: " + (endTime - startTime) + "ms");
}
/**
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java
index 3a402fd..11cebc5 100644
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java
@@ -26,6 +26,7 @@ public class SensitiveWordBsConfigTest {
.enableNumCheck(true)
.enableEmailCheck(true)
.enableUrlCheck(true)
+ .numCheckLen(8)
.init();
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumLenTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumLenTest.java
new file mode 100644
index 0000000..8383834
--- /dev/null
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumLenTest.java
@@ -0,0 +1,37 @@
+package com.github.houbb.sensitive.word.bs;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.List;
+
+/**
+ * project: sensitive-word-SensitiveWordBsTest
+ * create on 2020/1/7 23:43
+ *
+ * @author Administrator
+ * @since 0.2.1
+ */
+public class SensitiveWordBsNumLenTest {
+
+ /**
+ * 返回所有敏感词
+ * @since 0.2.1
+ */
+ @Test
+ public void findAllTest() {
+ final String text = "你懂得:12345678";
+
+ // 默认检测 8 位
+ List wordList = SensitiveWordBs.newInstance().findAll(text);
+ Assert.assertEquals("[12345678]", wordList.toString());
+
+ // 指定数字的长度,避免误杀
+ List wordList2 = SensitiveWordBs.newInstance()
+ .numCheckLen(9)
+ .findAll(text);
+ Assert.assertEquals("[]", wordList2.toString());
+ }
+
+
+}