+ * (1)如果未命中敏感词,直接返回 0 + * (2)命中敏感词,则返回敏感词的长度。 + *
+ * ps: 这里结果进行优化,
+ * 1. 是否包含敏感词。
+ * 2. 敏感词的长度
+ * 3. 正常走过字段的长度(便于后期替换优化,避免不必要的循环重复)
+ *
+ * @param txt 文本信息
+ * @param beginIndex 开始下标
+ * @param validModeEnum 验证模式
+ * @param context 执行上下文
+ * @return 敏感信息对应的长度
+ * @since 0.0.5
+ */
+ int checkSensitive(final String txt,
+ final int beginIndex,
+ final ValidModeEnum validModeEnum,
+ final IWordContext context);
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
index d0c706f..faa9619 100644
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
@@ -1,5 +1,7 @@
package com.github.houbb.sensitive.word.api;
+import java.util.Map;
+
/**
* @author binbin.hou
* @since 0.0.4
@@ -20,6 +22,14 @@ public interface IWordContext {
*/
boolean ignoreWidth();
+ /**
+ * 是否忽略数字格式
+ * @return 是否
+ * @since 0.0.5
+ */
+ boolean ignoreNumStyle();
+
+
/**
* 设置是否忽略大小写
* @param ignoreCase 是否忽略大小写
@@ -36,4 +46,41 @@ public interface IWordContext {
*/
IWordContext ignoreWidth(boolean ignoreWidth);
+ /**
+ * 设置是否忽略半角圆角
+ * @param ignoreNumStyle 是否忽略半角圆角
+ * @return this
+ * @since 0.0.5
+ */
+ IWordContext ignoreNumStyle(boolean ignoreNumStyle);
+
+ /**
+ * 获取敏感词信息
+ * @return 敏感词
+ * @since 0.0.5
+ */
+ Map sensitiveWordMap();
+
+ /**
+ * 敏感词信息
+ * @param map map 信息
+ * @return this
+ * @since 0.0.5
+ */
+ IWordContext sensitiveWordMap(final Map map);
+
+ /**
+ * 敏感数字检测
+ * @return 数字检测
+ * @since 0.0.5
+ */
+ boolean sensitiveNumCheck();
+
+ /**
+ * 设置敏感数字检测
+ * @return 数字检测
+ * @since 0.0.5
+ */
+ IWordContext sensitiveNumCheck(final boolean sensitiveNumCheck);
+
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
index d431ec6..fa6a58f 100644
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
@@ -10,7 +10,7 @@ import java.util.List;
* @author binbin.hou
* @since 0.0.1
*/
-public interface IWordMap {
+public interface IWordMap extends ISensitiveCheck {
/**
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
index 52cebde..1746216 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
@@ -79,8 +79,13 @@ public class SensitiveWordBs {
*/
private static IWordContext buildDefaultContext() {
IWordContext wordContext = SensitiveWordContext.newInstance();
+ // 格式统一化
wordContext.ignoreCase(true);
wordContext.ignoreWidth(true);
+ wordContext.ignoreNumStyle(true);
+
+ // 开启校验
+ wordContext.sensitiveNumCheck(true);
return wordContext;
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
index 29adaff..250f34a 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
@@ -2,6 +2,8 @@ package com.github.houbb.sensitive.word.bs;
import com.github.houbb.sensitive.word.api.IWordContext;
+import java.util.Map;
+
/**
* 上下文
* @author binbin.hou
@@ -21,6 +23,23 @@ public class SensitiveWordContext implements IWordContext {
*/
private boolean ignoreWidth;
+ /**
+ * 是否忽略数字格式
+ * @since 0.0.5
+ */
+ private boolean ignoreNumStyle;
+
+ /**
+ * 敏感词信息
+ * @since 0.0.5
+ */
+ private Map sensitiveWordMap;
+
+ /**
+ * 是否进行敏感数字检测
+ * @since 0.0.6
+ */
+ private boolean sensitiveNumCheck;
/**
* 私有化构造器
* @since 0.0.4
@@ -59,22 +78,37 @@ public class SensitiveWordContext implements IWordContext {
return this;
}
- private static class ContextHolder {
- private static final SensitiveWordContext INSTANCE = new SensitiveWordContext();
-
- static {
- INSTANCE.ignoreCase(true);
- INSTANCE.ignoreWidth(true);
- }
+ @Override
+ public boolean ignoreNumStyle() {
+ return ignoreNumStyle;
}
- /**
- * 默认配置
- * @return 结果
- * @since 0.0.4
- */
- private static SensitiveWordContext defaultContext() {
- return ContextHolder.INSTANCE;
+ @Override
+ public SensitiveWordContext ignoreNumStyle(boolean ignoreNumStyle) {
+ this.ignoreNumStyle = ignoreNumStyle;
+ return this;
+ }
+
+ @Override
+ public Map sensitiveWordMap() {
+ return sensitiveWordMap;
+ }
+
+ @Override
+ public SensitiveWordContext sensitiveWordMap(Map sensitiveWordMap) {
+ this.sensitiveWordMap = sensitiveWordMap;
+ return this;
+ }
+
+ @Override
+ public boolean sensitiveNumCheck() {
+ return sensitiveNumCheck;
+ }
+
+ @Override
+ public SensitiveWordContext sensitiveNumCheck(boolean sensitiveNumCheck) {
+ this.sensitiveNumCheck = sensitiveNumCheck;
+ return this;
}
@Override
@@ -82,6 +116,8 @@ public class SensitiveWordContext implements IWordContext {
return "SensitiveWordContext{" +
"ignoreCase=" + ignoreCase +
", ignoreWidth=" + ignoreWidth +
+ ", ignoreNumStyle=" + ignoreNumStyle +
+ ", sensitiveNumCheck=" + sensitiveNumCheck +
'}';
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java b/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java
index 7b9ff22..1fa8a1f 100644
--- a/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java
+++ b/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java
@@ -23,7 +23,7 @@ public final class AppConst {
* 字典的大小
* @since 0.0.1
*/
- public static final int DICT_SIZE = 65711;
+ public static final int DICT_SIZE = 65709;
/**
* 英语词典的大小
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveCheckChain.java b/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveCheckChain.java
new file mode 100644
index 0000000..682b872
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveCheckChain.java
@@ -0,0 +1,45 @@
+package com.github.houbb.sensitive.word.support.check;
+
+import com.github.houbb.heaven.annotation.ThreadSafe;
+import com.github.houbb.heaven.support.instance.impl.Instances;
+import com.github.houbb.heaven.util.guava.Guavas;
+import com.github.houbb.sensitive.word.api.ISensitiveCheck;
+import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
+
+import java.util.List;
+
+/**
+ * 敏感词检测责任链模式
+ *
+ * 这里可以提供一个公共的父类。
+ * @author binbin.hou
+ * @since 0.0.5
+ */
+@ThreadSafe
+public class SensitiveCheckChain implements ISensitiveCheck {
+
+ @Override
+ public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
+ // 初始化责任链
+ List
- * (1)如果未命中敏感词,直接返回 0
- * (2)命中敏感词,则返回敏感词的长度。
- *
- * ps: 这里结果进行优化,
- * 1. 是否包含敏感词。
- * 2. 敏感词的长度
- * 3. 正常走过字段的长度(便于后期替换优化,避免不必要的循环重复)
- *
- * @param txt 文本信息
- * @param beginIndex 开始下标
- * @param validModeEnum 验证模式
- * @param context 执行上下文
- * @return 敏感词对应的长度
- * @since 0.0.1
- */
- private int checkSensitiveWord(final String txt, final int beginIndex,
- final ValidModeEnum validModeEnum,
- final IWordContext context) {
- Map nowMap = innerWordMap;
-
- // 记录敏感词的长度
- int lengthCount = 0;
- int actualLength = 0;
-
- for (int i = beginIndex; i < txt.length(); i++) {
- char c = txt.charAt(i);
- char charKey = getActualChar(c, context);
-
- // 判断该字是否存在于敏感词库中
- // 并且将 nowMap 替换为新的 map,进入下一层的循环。
- nowMap = (Map) nowMap.get(charKey);
- if (ObjectUtil.isNotNull(nowMap)) {
- lengthCount++;
-
- // 判断是否是敏感词的结尾字,如果是结尾字则判断是否继续检测
- boolean isEnd = (boolean) nowMap.get(AppConst.IS_END);
- if (isEnd) {
- // 只在匹配到结束的时候才记录长度,避免不完全匹配导致的问题。
- // eg: 敏感词 敏感词xxx
- // 如果是 【敏感词x】也会被匹配。
- actualLength = lengthCount;
-
- // 这里确实需要一种验证模式,主要是为了最大匹配从而达到最佳匹配的效果。
- if (ValidModeEnum.FAIL_FAST.equals(validModeEnum)) {
- break;
- }
- }
- } else {
- // 直接跳出循环
- break;
- }
- }
-
- return actualLength;
- }
-
- /**
- * 获取实际对应的符号
- * @param c 编号
- * @param context 上下文
- * @return 结果
- * @since 0.0.4
- */
- private char getActualChar(final char c,
- final IWordContext context) {
- char resultChar = c;
-
- if(context.ignoreCase()) {
- resultChar = Character.toLowerCase(resultChar);
- }
- if(context.ignoreWidth()) {
- resultChar = CharUtil.toHalfWidth(resultChar);
- }
-
- return resultChar;
- }
-
/**
* 直接替换敏感词,返回替换后的结果
* @param target 文本信息
@@ -301,7 +223,7 @@ public class SensitiveWordMap implements IWordMap {
for (int i = 0; i < target.length(); i++) {
char currentChar = target.charAt(i);
// 内层直接从 i 开始往后遍历,这个算法的,获取第一个匹配的单词
- int wordLength = checkSensitiveWord(target, i, ValidModeEnum.FAIL_OVER, context);
+ int wordLength = checkSensitive(target, i, ValidModeEnum.FAIL_OVER, context);
// 敏感词
if(wordLength > 0) {
@@ -319,4 +241,14 @@ public class SensitiveWordMap implements IWordMap {
return resultBuilder.toString();
}
+ @Override
+ public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
+ // 默认执行敏感词操作
+ context.sensitiveWordMap(innerWordMap);
+
+ // 责任链模式调用
+ return Instances.singleton(SensitiveCheckChain.class)
+ .checkSensitive(txt, beginIndex, validModeEnum, context);
+ }
+
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/NumUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/NumUtils.java
index daa525f..486bbe5 100644
--- a/src/main/java/com/github/houbb/sensitive/word/utils/NumUtils.java
+++ b/src/main/java/com/github/houbb/sensitive/word/utils/NumUtils.java
@@ -3,6 +3,8 @@ package com.github.houbb.sensitive.word.utils;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.heaven.util.lang.ObjectUtil;
import com.github.houbb.heaven.util.lang.StringUtil;
+import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import java.util.Map;
@@ -51,11 +53,6 @@ public final class NumUtils {
"123456789" +
"123456789" +
"123456789" +
- "123456789" +
- "123456789" +
- "123456789" +
- "123456789" +
- "123456789" +
"123456789";
/**
@@ -104,4 +101,28 @@ public final class NumUtils {
return stringBuilder.toString();
}
+ /**
+ * 检查敏感词数量
+ *
+ * (1)如果未命中敏感词,直接返回 0
+ * (2)命中敏感词,则返回敏感词的长度。
+ *
+ * ps: 这里结果进行优化,
+ * 1. 是否包含敏感词。
+ * 2. 敏感词的长度
+ * 3. 正常走过字段的长度(便于后期替换优化,避免不必要的循环重复)
+ *
+ * @param txt 文本信息
+ * @param beginIndex 开始下标
+ * @param validModeEnum 验证模式
+ * @param context 执行上下文
+ * @return 敏感数字对应的长度
+ * @since 0.0.5
+ */
+ private int getSensitiveNumber(final String txt, final int beginIndex,
+ final ValidModeEnum validModeEnum,
+ final IWordContext context) {
+ return 0;
+ }
+
}
diff --git a/src/main/resources/dict.txt b/src/main/resources/dict.txt
index 8130303..75c7377 100644
--- a/src/main/resources/dict.txt
+++ b/src/main/resources/dict.txt
@@ -32719,7 +32719,6 @@ z以留吧以其以武
成都美女上门qq100996803
成都锦天
成龙记
-我
我qiuqiu446巴636巴8
我qq前五位27279接后接61388共十位
我xx你
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumTest.java
new file mode 100644
index 0000000..552593f
--- /dev/null
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumTest.java
@@ -0,0 +1,41 @@
+package com.github.houbb.sensitive.word.bs;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.List;
+
+/**
+ * project: sensitive-word-SensitiveWordBsTest create on 2020/1/7 23:43