diff --git a/release.bat b/release.bat
index 6209cd6..2f3a21d 100644
--- a/release.bat
+++ b/release.bat
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
-SET version=0.0.6
+SET version=0.0.7
:::: 新版本名称
-SET newVersion=0.0.7
+SET newVersion=0.0.8
:::: 组织名称
SET groupName=com.github.houbb
:::: 项目名称
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
index 7b7bd5c..29ff5ce 100644
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
@@ -29,7 +29,6 @@ public interface IWordContext {
*/
boolean ignoreNumStyle();
-
/**
* 设置是否忽略大小写
* @param ignoreCase 是否忽略大小写
@@ -64,12 +63,11 @@ public interface IWordContext {
/**
* 设置是否忽略中文繁简体格式
* @param ignoreChineseStyle 是否忽略
- * @return 是否
+ * @return this
* @since 0.0.6
*/
IWordContext ignoreChineseStyle(final boolean ignoreChineseStyle);
-
/**
* 获取敏感词信息
* @return 敏感词
@@ -95,7 +93,7 @@ public interface IWordContext {
/**
* 设置敏感数字检测
* @param sensitiveNumCheck 数字格式检测
- * @return 数字检测
+ * @return this
* @since 0.0.5
*/
IWordContext sensitiveNumCheck(final boolean sensitiveNumCheck);
@@ -110,9 +108,24 @@ public interface IWordContext {
/**
* 设置忽略英文的写法
* @param ignoreEnglishStyle 是否忽略
- * @return 数字检测
+ * @return this
* @since 0.0.6
*/
IWordContext ignoreEnglishStyle(final boolean ignoreEnglishStyle);
+ /**
+ * 忽略重复词
+ * @return 是否忽略
+ * @since 0.0.7
+ */
+ boolean ignoreRepeat();
+
+ /**
+ * 设置忽略重复词
+ * @param ignoreRepeat 是否忽略
+ * @return this
+ * @since 0.0.7
+ */
+ IWordContext ignoreRepeat(final boolean ignoreRepeat);
+
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
index 1669604..da2fa86 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
@@ -85,6 +85,7 @@ public class SensitiveWordBs {
wordContext.ignoreNumStyle(true);
wordContext.ignoreChineseStyle(true);
wordContext.ignoreEnglishStyle(true);
+ wordContext.ignoreRepeat(true);
// 开启校验
wordContext.sensitiveNumCheck(true);
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
index 232709b..9cdf438 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
@@ -53,6 +53,12 @@ public class SensitiveWordContext implements IWordContext {
*/
private boolean ignoreEnglishStyle;
+ /**
+ * 忽略重复词
+ * @since 0.0.7
+ */
+ private boolean ignoreRepeat;
+
/**
* 私有化构造器
* @since 0.0.4
@@ -147,16 +153,14 @@ public class SensitiveWordContext implements IWordContext {
}
@Override
- public String toString() {
- return "SensitiveWordContext{" +
- "ignoreCase=" + ignoreCase +
- ", ignoreWidth=" + ignoreWidth +
- ", ignoreNumStyle=" + ignoreNumStyle +
- ", sensitiveWordMap=" + sensitiveWordMap +
- ", sensitiveNumCheck=" + sensitiveNumCheck +
- ", ignoreChineseStyle=" + ignoreChineseStyle +
- ", ignoreEnglishStyle=" + ignoreEnglishStyle +
- '}';
+ public boolean ignoreRepeat() {
+ return ignoreRepeat;
+ }
+
+ @Override
+ public SensitiveWordContext ignoreRepeat(boolean ignoreRepeat) {
+ this.ignoreRepeat = ignoreRepeat;
+ return this;
}
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveWordCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveWordCheck.java
index 1a1d48c..b1f27d5 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveWordCheck.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveWordCheck.java
@@ -2,7 +2,6 @@ package com.github.houbb.sensitive.word.support.check;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.instance.impl.Instances;
-import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.lang.ObjectUtil;
import com.github.houbb.sensitive.word.api.ISensitiveCheck;
import com.github.houbb.sensitive.word.api.IWordContext;
@@ -29,12 +28,9 @@ public class SensitiveWordCheck implements ISensitiveCheck {
int actualLength = 0;
for (int i = beginIndex; i < txt.length(); i++) {
- char c = txt.charAt(i);
- char charKey = Instances.singleton(CharFormatChain.class).format(c, context);
+ // 获取当前的 map 信息
+ nowMap = getNowMap(nowMap, context, txt, i);
- // 判断该字是否存在于敏感词库中
- // 并且将 nowMap 替换为新的 map,进入下一层的循环。
- nowMap = (Map) nowMap.get(charKey);
if (ObjectUtil.isNotNull(nowMap)) {
lengthCount++;
@@ -60,4 +56,38 @@ public class SensitiveWordCheck implements ISensitiveCheck {
return actualLength;
}
+ /**
+ * 获取当前的 Map
+ * @param nowMap 原始的当前 map
+ * @param context 上下文
+ * @param txt 文本信息
+ * @param index 下标
+ * @return 实际的当前 map
+ * @since 0.0.7
+ */
+ private Map getNowMap(Map nowMap,
+ final IWordContext context,
+ final String txt,
+ final int index) {
+ char c = txt.charAt(index);
+ char mappingChar = Instances.singleton(CharFormatChain.class).format(c, context);
+
+ // 这里做一次重复词的处理
+ Map currentMap = (Map) nowMap.get(mappingChar);
+ // 启用忽略重复&当前下标不是第一个
+ if(context.ignoreRepeat()
+ && index > 0) {
+ char preChar = txt.charAt(index-1);
+ char preMappingChar = Instances.singleton(CharFormatChain.class)
+ .format(preChar, context);
+
+ // 直接赋值为上一个 map
+ if(preMappingChar == mappingChar) {
+ currentMap = nowMap;
+ }
+ }
+
+ return currentMap;
+ }
+
}
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsRepeatTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsRepeatTest.java
new file mode 100644
index 0000000..16e20df
--- /dev/null
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsRepeatTest.java
@@ -0,0 +1,29 @@
+package com.github.houbb.sensitive.word.bs;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.List;
+
+/**
+ * project: sensitive-word-SensitiveWordBsTest
+ * create on 2020/1/7 23:43
+ *
+ * @author Administrator
+ * @since 0.0.7
+ */
+public class SensitiveWordBsRepeatTest {
+
+ /**
+ * 忽略重复词
+ * @since 0.0.7
+ */
+ @Test
+ public void ignoreChineseStyleTest() {
+ final String text = "ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦ the bad words";
+
+ List wordList = SensitiveWordBs.newInstance().findAll(text);
+ Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString());
+ }
+
+}