From b384198d5b261a602b8899ba7188ab446b7d6fed Mon Sep 17 00:00:00 2001
From: yudasen <2436348937@qq.com>
Date: Sun, 16 Feb 2025 18:42:31 +0800
Subject: [PATCH 1/6] =?UTF-8?q?feature:=20=E6=8F=90=E4=BE=9B=E8=87=AA?=
=?UTF-8?q?=E5=AE=9A=E4=B9=89=E6=9B=BF=E6=8D=A2=E7=AD=96=E7=95=A5=E7=9A=84?=
=?UTF-8?q?api,=E8=A7=A3=E5=86=B3issue#36=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../sensitive/word/api/ISensitiveWord.java | 8 +++++---
.../sensitive/word/bs/SensitiveWordBs.java | 5 ++++-
.../word/core/AbstractSensitiveWord.java | 7 +++----
.../word/bs/SensitiveWordBsReplaceTest.java | 20 +++++++++++++++++++
4 files changed, 32 insertions(+), 8 deletions(-)
create mode 100644 src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsReplaceTest.java
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java
index c3de23d..fe921a4 100644
--- a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java
@@ -33,16 +33,18 @@ public interface ISensitiveWord {
/**
* 替换所有敏感词内容
- *
+ *
* ps: 这里可以添加优化。
*
- * @param target 目标字符串
+ * @param target 目标字符串
* @param context 上下文
+ * @param replace 替换策略
* @return 替换后结果
* @since 0.3.2
*/
String replace(final String target,
- final IWordContext context);
+ final IWordContext context,
+ final IWordReplace replace);
/**
* 包含
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
index 31441f3..a9afa35 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
@@ -592,7 +592,10 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
* @since 0.2.0
*/
public String replace(final String target) {
- return sensitiveWord.replace(target, context);
+ return this.replace(target,context.wordReplace());
+ }
+ public String replace(final String target, IWordReplace replace) {
+ return sensitiveWord.replace(target, context, replace);
}
/**
diff --git a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java
index 7648300..28c8866 100644
--- a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java
@@ -38,10 +38,9 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
* @return 结果
* @since 0.3.2
*/
- protected String doReplace(String target, List allList, IWordContext context) {
+ protected String doReplace(String target, List allList, IWordContext context, IWordReplace replace) {
// 根据 index 直接分割
- final IWordReplace replace = context.wordReplace();
// 是否需要对 allList 排序?
StringBuilder stringBuilder = new StringBuilder();
@@ -91,7 +90,7 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
}
@Override
- public String replace(String target, IWordContext context) {
+ public String replace(String target, IWordContext context, IWordReplace replace) {
if(StringUtil.isEmpty(target)) {
return target;
}
@@ -101,7 +100,7 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
return target;
}
- return doReplace(target, allList, context);
+ return doReplace(target, allList, context, replace);
}
@Override
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsReplaceTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsReplaceTest.java
new file mode 100644
index 0000000..30ccdc9
--- /dev/null
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsReplaceTest.java
@@ -0,0 +1,20 @@
+package com.github.houbb.sensitive.word.bs;
+
+import com.github.houbb.sensitive.word.api.IWordReplace;
+import com.github.houbb.sensitive.word.replace.MyWordReplace;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class SensitiveWordBsReplaceTest {
+
+ @Test
+ public void defineReplaceTest() {
+ final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
+ SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().init();
+
+ IWordReplace replace = new MyWordReplace();
+ String result = sensitiveWordBs.replace(text, replace);
+
+ Assert.assertEquals("国家旗帜迎风飘扬,教员的画像屹立在***前。", result);
+ }
+}
From bfe3a606c9bb7439433c725c83f627cd8b2145e0 Mon Sep 17 00:00:00 2001
From: yds <11232266+yuds11@user.noreply.gitee.com>
Date: Fri, 2 May 2025 22:05:10 +0800
Subject: [PATCH 2/6] =?UTF-8?q?issue110,=E5=B9=B6=E5=BA=9F=E9=99=A4?=
=?UTF-8?q?=E4=B8=80=E4=BA=9B=E9=BB=91=E7=99=BD=E5=90=8D=E5=8D=95=E4=B8=80?=
=?UTF-8?q?=E6=AC=A1=E9=81=8D=E5=8E=86=E6=97=B6=E7=9A=84=E5=86=97=E4=BD=99?=
=?UTF-8?q?=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../sensitive/word/api/IWordContext.java | 11 +++
.../sensitive/word/bs/SensitiveWordBs.java | 9 +-
.../word/bs/SensitiveWordContext.java | 17 ++++
.../word/support/check/WordCheckWord.java | 41 ++++----
.../support/check/WordCheckWordMaxLen.java | 96 -------------------
.../word/bs/SensitiveWordFailFastTest.java | 47 +++++++++
.../word/bs/SensitiveWordMaxFirstTest.java | 28 ------
7 files changed, 103 insertions(+), 146 deletions(-)
delete mode 100644 src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java
create mode 100644 src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java
delete mode 100644 src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
index 2b50f08..508899d 100644
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
@@ -8,6 +8,17 @@ import com.github.houbb.sensitive.word.bs.SensitiveWordContext;
*/
public interface IWordContext {
+
+ /**
+ * 为true时,遇到第一个敏感词词就返回
+ * 解决issue110
+ * @return
+ */
+ boolean failFastWordPattern();
+
+ IWordContext failFastWordPattern(boolean failFastWordPattern);
+
+
/**
* 是否忽略大小写
* @return 是否
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
index 4f8e50d..86a48b9 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
@@ -2,7 +2,6 @@ package com.github.houbb.sensitive.word.bs;
import com.github.houbb.heaven.support.handler.IHandler;
import com.github.houbb.heaven.util.common.ArgUtil;
-import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.*;
import com.github.houbb.sensitive.word.api.combine.IWordAllowDenyCombine;
@@ -70,6 +69,9 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
*/
private boolean ignoreRepeat = false;
+ private boolean failFastWordPattern = true;
+
+
// 开启校验
/**
* 启用数字检测
@@ -278,6 +280,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
context.ignoreChineseStyle(ignoreChineseStyle);
context.ignoreEnglishStyle(ignoreEnglishStyle);
context.ignoreRepeat(ignoreRepeat);
+ context.failFastWordPattern(failFastWordPattern);
// 开启校验
context.enableNumCheck(enableNumCheck);
@@ -579,6 +582,10 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
this.ignoreRepeat = ignoreRepeat;
return this;
}
+ public SensitiveWordBs failFastWordPattern(boolean failFastWordPattern) {
+ this.failFastWordPattern = failFastWordPattern;
+ return this;
+ }
//------------------------------------------------------------------------------------ 公开方法 START
/**
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
index bcd884c..e2a68c8 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
@@ -10,6 +10,12 @@ import com.github.houbb.sensitive.word.support.check.WordChecks;
*/
public class SensitiveWordContext implements IWordContext {
+
+ /**
+ * issue110
+ */
+ private boolean failFastWordPattern;
+
/**
* 忽略大小写
* @since 0.0.4
@@ -220,6 +226,17 @@ public class SensitiveWordContext implements IWordContext {
return new SensitiveWordContext();
}
+ @Override
+ public boolean failFastWordPattern() {
+ return failFastWordPattern;
+ }
+
+ public IWordContext failFastWordPattern(boolean failFastWordPattern){
+ this.failFastWordPattern=failFastWordPattern;
+ return this;
+ }
+
+
@Override
public boolean ignoreCase() {
return ignoreCase;
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
index 4fb7488..c3113de 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
@@ -1,14 +1,12 @@
package com.github.houbb.sensitive.word.support.check;
import com.github.houbb.heaven.annotation.ThreadSafe;
-import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
import com.github.houbb.sensitive.word.api.IWordCheck;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordData;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
-import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
import com.github.houbb.sensitive.word.support.result.WordLengthResult;
@@ -44,47 +42,48 @@ public class WordCheckWord extends AbstractWordCheck {
final IWordData wordData = context.wordData();
final IWordData wordDataAllow = context.wordDataAllow();
final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore();
+ final boolean failFast = context.failFastWordPattern();
- // 前一个条件
StringBuilder stringBuilder = new StringBuilder();
char[] rawChars = txt.toCharArray();
-
int tempLen = 0;
int maxWhite = 0;
int maxBlack = 0;
- boolean firstCheck = true;
-
- WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
- WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
for (int i = beginIndex; i < rawChars.length; i++) {
if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) {
tempLen++;
continue;
}
-
char mappingChar = formatCharMapping.get(rawChars[i]);
stringBuilder.append(mappingChar);
tempLen++;
- if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow)) {
- wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
- if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) {
- maxWhite += tempLen;
- wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND;
+ WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
+ WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
+
+ if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) {
+ maxWhite += tempLen;
+ if (!failFast) {
+ //此处将tempLen设为0,为了防止重复累加
+ tempLen = 0;
+ }else{
+ //为falFast模式,主动设为notFound退出循环
+ wordContainsTypeEnumAllow=WordContainsTypeEnum.NOT_FOUND;
}
}
- if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
- wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
- if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) {
- maxBlack += tempLen;
- wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND;
+ if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) {
+ maxBlack += tempLen;
+ if (!failFast) {
+ //此处将tempLen设为0,为了防止重复累加
+ tempLen = 0;
+ }else{
+ //为falFast模式,主动设为notFound退出循环
+ wordContainsTypeEnumDeny=WordContainsTypeEnum.NOT_FOUND;
}
}
- firstCheck = false;
-
if (WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow) &&
WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
break;
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java
deleted file mode 100644
index b7c72c0..0000000
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java
+++ /dev/null
@@ -1,96 +0,0 @@
-package com.github.houbb.sensitive.word.support.check;
-
-import com.github.houbb.heaven.annotation.ThreadSafe;
-import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
-import com.github.houbb.sensitive.word.api.IWordCheck;
-import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.api.IWordData;
-import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
-import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
-import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
-import com.github.houbb.sensitive.word.support.result.WordLengthResult;
-
-import java.util.Map;
-
-/**
- * 敏感词监测实现
- * @author binbin.hou
- * @since 0.26.0
- */
-@Deprecated
-public class WordCheckWordMaxLen extends AbstractWordCheck {
-
- @Override
- protected Class extends IWordCheck> getSensitiveCheckClass() {
- return WordCheckWordMaxLen.class;
- }
-
- @Override
- protected WordLengthResult getActualLength(int beginIndex, InnerSensitiveWordContext innerContext) {
- final String txt = innerContext.originalText();
- final Map formatCharMapping = innerContext.formatCharMapping();
- final IWordContext context = innerContext.wordContext();
- final IWordData wordData = context.wordData();
- final IWordData wordDataAllow = context.wordDataAllow();
- final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore();
-
- // 前一个条件
- StringBuilder stringBuilder = new StringBuilder();
- char[] rawChars = txt.toCharArray();
-
- int tempLen = 0;
- int maxWhite = 0;
- int maxBlack = 0;
- boolean firstCheck = true;
-
- WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
- WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
-
- for (int i = beginIndex; i < rawChars.length; i++) {
- if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) {
- tempLen++;
- continue;
- }
-
- char mappingChar = formatCharMapping.get(rawChars[i]);
- stringBuilder.append(mappingChar);
- tempLen++;
-
- if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow)) {
- wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
- if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) {
- maxWhite += tempLen;
- wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND;
- }
- }
-
- // 黑名单命中
- if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
- wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
- if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) {
- maxBlack += tempLen;
- wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND;
- }
- }
-
- // 不再是第一次检测
- firstCheck = false;
-
- // 黑白名单都未匹配
- if (WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow) &&
- WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
- break;
- }
- }
-
- return WordLengthResult.newInstance()
- .wordAllowLen(maxWhite)
- .wordDenyLen(maxBlack);
- }
-
- @Override
- protected String getType() {
- return WordTypeEnum.WORD.getCode();
- }
-
-}
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java
new file mode 100644
index 0000000..6b4258c
--- /dev/null
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java
@@ -0,0 +1,47 @@
+package com.github.houbb.sensitive.word.bs;
+
+import com.github.houbb.sensitive.word.api.IWordDeny;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.List;
+
+public class SensitiveWordFailFastTest {
+
+ @Test
+ public void failFastTest() {
+ SensitiveWordBs bs = SensitiveWordBs.newInstance()
+ .failFastWordPattern(true)
+ .wordDeny(new IWordDeny() {
+ @Override
+ public List deny() {
+ return Arrays.asList("我的世界", "我的");
+ }
+ }).init();
+
+ String text = "我在我的家里玩我的世界";
+
+ List textList = bs.findAll(text);
+ Assert.assertEquals(Arrays.asList("我的", "我的"), textList);
+
+ }
+ @Test
+ public void fallOverTest() {
+ SensitiveWordBs bs = SensitiveWordBs.newInstance()
+ .failFastWordPattern(false)
+ .wordDeny(new IWordDeny() {
+ @Override
+ public List deny() {
+ return Arrays.asList("我的世界", "我的");
+ }
+ }).init();
+
+ String text = "我在我的家里玩我的世界";
+
+ List textList = bs.findAll(text);
+ Assert.assertEquals(Arrays.asList("我的", "我的世界"), textList);
+
+ }
+
+}
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java
deleted file mode 100644
index 2c0f819..0000000
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java
+++ /dev/null
@@ -1,28 +0,0 @@
-package com.github.houbb.sensitive.word.bs;
-
-import com.github.houbb.sensitive.word.api.IWordDeny;
-import org.junit.Assert;
-import org.junit.Test;
-
-import java.util.Arrays;
-import java.util.List;
-
-public class SensitiveWordMaxFirstTest {
-
- @Test
- public void maxFirstTest() {
- SensitiveWordBs bs = SensitiveWordBs.newInstance()
- .wordDeny(new IWordDeny() {
- @Override
- public List deny() {
- return Arrays.asList("我的世界", "我的");
- }
- }).init();
-
- String text = "我的世界我的好玩";
-
- List textList = bs.findAll(text);
-// Assert.assertEquals("", textList.toString());
- }
-
-}
From 849dd6438043c7cc96b92464685cc5f45bc72804 Mon Sep 17 00:00:00 2001
From: yds <11232266+yuds11@user.noreply.gitee.com>
Date: Fri, 2 May 2025 22:48:50 +0800
Subject: [PATCH 3/6] =?UTF-8?q?issue110,=E5=B9=B6=E5=BA=9F=E9=99=A4?=
=?UTF-8?q?=E4=B8=80=E4=BA=9B=E9=BB=91=E7=99=BD=E5=90=8D=E5=8D=95=E4=B8=80?=
=?UTF-8?q?=E6=AC=A1=E9=81=8D=E5=8E=86=E6=97=B6=E7=9A=84=E5=86=97=E4=BD=99?=
=?UTF-8?q?=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../sensitive/word/support/check/WordCheckWord.java | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
index c3113de..a72abd6 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
@@ -9,6 +9,7 @@ import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
import com.github.houbb.sensitive.word.support.result.WordLengthResult;
+import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
import java.util.Map;
@@ -90,9 +91,18 @@ public class WordCheckWord extends AbstractWordCheck {
}
}
+ String whiteWord = txt.substring(beginIndex, beginIndex + maxWhite);
+ String blackWord = txt.substring(beginIndex, beginIndex + maxBlack);
+
+ String formatWhiteWord= InnerWordFormatUtils.format(whiteWord,context);
+ String formatBlackWord= InnerWordFormatUtils.format(blackWord,context);
+
+
return WordLengthResult.newInstance()
.wordAllowLen(maxWhite)
- .wordDenyLen(maxBlack);
+ .wordDenyLen(maxBlack)
+ .wordAllow(formatWhiteWord)
+ .wordDeny(formatBlackWord);
}
@Override
From 91e811b360c780f90462c8f826ffc241bbfe06a5 Mon Sep 17 00:00:00 2001
From: yds <11232266+yuds11@user.noreply.gitee.com>
Date: Fri, 2 May 2025 22:49:43 +0800
Subject: [PATCH 4/6] =?UTF-8?q?issue110,=E5=B9=B6=E5=BA=9F=E9=99=A4?=
=?UTF-8?q?=E4=B8=80=E4=BA=9B=E9=BB=91=E7=99=BD=E5=90=8D=E5=8D=95=E4=B8=80?=
=?UTF-8?q?=E6=AC=A1=E9=81=8D=E5=8E=86=E6=97=B6=E7=9A=84=E5=86=97=E4=BD=99?=
=?UTF-8?q?=E9=80=BB=E8=BE=91,=E5=B9=B6=E8=A7=A3=E5=86=B3=E8=BF=94?=
=?UTF-8?q?=E5=9B=9E=E5=AE=9E=E9=99=85=E5=80=BC=E6=97=B6=E7=9A=84=E5=86=B2?=
=?UTF-8?q?=E7=AA=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../sensitive/word/bs/SensitiveWordContext.java | 2 ++
.../word/support/check/WordCheckWord.java | 16 +++++++++-------
2 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
index e2a68c8..6f142fd 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
@@ -231,6 +231,8 @@ public class SensitiveWordContext implements IWordContext {
return failFastWordPattern;
}
+
+ @Override
public IWordContext failFastWordPattern(boolean failFastWordPattern){
this.failFastWordPattern=failFastWordPattern;
return this;
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
index a72abd6..f7779d1 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
@@ -5,6 +5,7 @@ import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
import com.github.houbb.sensitive.word.api.IWordCheck;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordData;
+import com.github.houbb.sensitive.word.api.IWordFormat;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
@@ -50,10 +51,12 @@ public class WordCheckWord extends AbstractWordCheck {
int tempLen = 0;
int maxWhite = 0;
int maxBlack = 0;
+ int skipLen=0;
for (int i = beginIndex; i < rawChars.length; i++) {
if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) {
tempLen++;
+ skipLen++;
continue;
}
char mappingChar = formatCharMapping.get(rawChars[i]);
@@ -91,20 +94,19 @@ public class WordCheckWord extends AbstractWordCheck {
}
}
- String whiteWord = txt.substring(beginIndex, beginIndex + maxWhite);
- String blackWord = txt.substring(beginIndex, beginIndex + maxBlack);
-
- String formatWhiteWord= InnerWordFormatUtils.format(whiteWord,context);
- String formatBlackWord= InnerWordFormatUtils.format(blackWord,context);
+ String string = stringBuilder.toString();
+ String wordAllow = string.substring(0, Math.max(0,maxWhite - skipLen));
+ String wordDeny = string.substring(0, Math.max(0,maxBlack - skipLen));
return WordLengthResult.newInstance()
.wordAllowLen(maxWhite)
.wordDenyLen(maxBlack)
- .wordAllow(formatWhiteWord)
- .wordDeny(formatBlackWord);
+ .wordAllow(wordAllow)
+ .wordDeny(wordDeny);
}
+
@Override
protected String getType() {
return WordTypeEnum.WORD.getCode();
From c35772d70e3c1b65dce3ccf4c91e30849e206723 Mon Sep 17 00:00:00 2001
From: yds <11232266+yuds11@user.noreply.gitee.com>
Date: Fri, 2 May 2025 23:31:43 +0800
Subject: [PATCH 5/6] =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=AF=AF=E5=8A=A0?=
=?UTF-8?q?=E7=9A=84replace?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../houbb/sensitive/word/api/ISensitiveWord.java | 14 ++++++--------
.../houbb/sensitive/word/bs/SensitiveWordBs.java | 6 ++----
.../sensitive/word/core/AbstractSensitiveWord.java | 8 ++++----
3 files changed, 12 insertions(+), 16 deletions(-)
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java
index fe921a4..17f246d 100644
--- a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java
@@ -19,7 +19,7 @@ public interface ISensitiveWord {
* @see WordValidModeEnum#FAIL_OVER 建议使用全部检测返回模式
*/
List findAll(final String string,
- final IWordContext context);
+ final IWordContext context);
/**
* 返回第一个对应的敏感词
@@ -29,22 +29,20 @@ public interface ISensitiveWord {
* @since 0.3.2
*/
IWordResult findFirst(final String string,
- final IWordContext context);
+ final IWordContext context);
/**
* 替换所有敏感词内容
- *
+ *
* ps: 这里可以添加优化。
*
- * @param target 目标字符串
+ * @param target 目标字符串
* @param context 上下文
- * @param replace 替换策略
* @return 替换后结果
* @since 0.3.2
*/
String replace(final String target,
- final IWordContext context,
- final IWordReplace replace);
+ final IWordContext context);
/**
* 包含
@@ -54,6 +52,6 @@ public interface ISensitiveWord {
* @since 0.3.2
*/
boolean contains(final String string,
- final IWordContext context);
+ final IWordContext context);
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
index 86a48b9..8d10b5b 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
@@ -672,12 +672,10 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
* @since 0.2.0
*/
public String replace(final String target) {
- return this.replace(target,context.wordReplace());
- }
- public String replace(final String target, IWordReplace replace) {
- return sensitiveWord.replace(target, context, replace);
+ return sensitiveWord.replace(target, context);
}
+
/**
* 获取敏感词的标签
*
diff --git a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java
index 28c8866..c3ce135 100644
--- a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java
@@ -38,8 +38,9 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
* @return 结果
* @since 0.3.2
*/
- protected String doReplace(String target, List allList, IWordContext context, IWordReplace replace) {
+ protected String doReplace(String target, List allList, IWordContext context) {
// 根据 index 直接分割
+ final IWordReplace replace = context.wordReplace();
// 是否需要对 allList 排序?
StringBuilder stringBuilder = new StringBuilder();
@@ -90,7 +91,7 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
}
@Override
- public String replace(String target, IWordContext context, IWordReplace replace) {
+ public String replace(String target, IWordContext context) {
if(StringUtil.isEmpty(target)) {
return target;
}
@@ -100,9 +101,8 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
return target;
}
- return doReplace(target, allList, context, replace);
+ return doReplace(target, allList, context);
}
-
@Override
public boolean contains(String string, IWordContext context) {
//1. 第一个存在
From 6132261e77a7c381ce109d3386a847fa764b0f86 Mon Sep 17 00:00:00 2001
From: yds <11232266+yuds11@user.noreply.gitee.com>
Date: Fri, 2 May 2025 23:31:43 +0800
Subject: [PATCH 6/6] =?UTF-8?q?=E9=BB=91=E7=99=BD=E5=90=8D=E5=8D=95?=
=?UTF-8?q?=E5=85=B1=E5=90=8C=E6=A3=80=E6=B5=8B=E6=97=B6=E9=80=BB=E8=BE=91?=
=?UTF-8?q?=E7=9A=84=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../sensitive/word/api/ISensitiveWord.java | 14 +-
.../sensitive/word/bs/SensitiveWordBs.java | 6 +-
.../word/core/AbstractSensitiveWord.java | 8 +-
.../sensitive/word/core/SensitiveWord.java | 34 ++-
.../word/support/check/WordCheckWord.java | 24 +--
.../word/bs/SensitiveWordBsReplaceTest.java | 20 --
.../word/bs/SensitiveWordFailFastTest.java | 200 +++++++++++++++++-
7 files changed, 233 insertions(+), 73 deletions(-)
delete mode 100644 src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsReplaceTest.java
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java
index fe921a4..17f246d 100644
--- a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java
@@ -19,7 +19,7 @@ public interface ISensitiveWord {
* @see WordValidModeEnum#FAIL_OVER 建议使用全部检测返回模式
*/
List findAll(final String string,
- final IWordContext context);
+ final IWordContext context);
/**
* 返回第一个对应的敏感词
@@ -29,22 +29,20 @@ public interface ISensitiveWord {
* @since 0.3.2
*/
IWordResult findFirst(final String string,
- final IWordContext context);
+ final IWordContext context);
/**
* 替换所有敏感词内容
- *
+ *
* ps: 这里可以添加优化。
*
- * @param target 目标字符串
+ * @param target 目标字符串
* @param context 上下文
- * @param replace 替换策略
* @return 替换后结果
* @since 0.3.2
*/
String replace(final String target,
- final IWordContext context,
- final IWordReplace replace);
+ final IWordContext context);
/**
* 包含
@@ -54,6 +52,6 @@ public interface ISensitiveWord {
* @since 0.3.2
*/
boolean contains(final String string,
- final IWordContext context);
+ final IWordContext context);
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
index 86a48b9..8d10b5b 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
@@ -672,12 +672,10 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
* @since 0.2.0
*/
public String replace(final String target) {
- return this.replace(target,context.wordReplace());
- }
- public String replace(final String target, IWordReplace replace) {
- return sensitiveWord.replace(target, context, replace);
+ return sensitiveWord.replace(target, context);
}
+
/**
* 获取敏感词的标签
*
diff --git a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java
index 28c8866..c3ce135 100644
--- a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java
@@ -38,8 +38,9 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
* @return 结果
* @since 0.3.2
*/
- protected String doReplace(String target, List allList, IWordContext context, IWordReplace replace) {
+ protected String doReplace(String target, List allList, IWordContext context) {
// 根据 index 直接分割
+ final IWordReplace replace = context.wordReplace();
// 是否需要对 allList 排序?
StringBuilder stringBuilder = new StringBuilder();
@@ -90,7 +91,7 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
}
@Override
- public String replace(String target, IWordContext context, IWordReplace replace) {
+ public String replace(String target, IWordContext context) {
if(StringUtil.isEmpty(target)) {
return target;
}
@@ -100,9 +101,8 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
return target;
}
- return doReplace(target, allList, context, replace);
+ return doReplace(target, allList, context);
}
-
@Override
public boolean contains(String string, IWordContext context) {
//1. 第一个存在
diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java
index 8147d34..7af8705 100644
--- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java
@@ -37,7 +37,7 @@ public class SensitiveWord extends AbstractSensitiveWord {
@Override
protected IWordResult doFindFirst(String string, IWordContext context) {
List wordResults = innerSensitiveWords(string, WordValidModeEnum.FAIL_FAST, context);
- if(!CollectionUtil.isEmpty(wordResults)){
+ if (!CollectionUtil.isEmpty(wordResults)) {
return wordResults.get(0);
}
return null;
@@ -47,14 +47,14 @@ public class SensitiveWord extends AbstractSensitiveWord {
/**
* 获取敏感词列表
*
- * @param text 文本
+ * @param text 文本
* @param modeEnum 模式
* @return 结果列表
* @since 0.0.1
*/
private List innerSensitiveWords(final String text,
- final WordValidModeEnum modeEnum,
- final IWordContext context) {
+ final WordValidModeEnum modeEnum,
+ final IWordContext context) {
//1. 是否存在敏感词,如果比存在,直接返回空列表
final IWordCheck sensitiveCheck = context.sensitiveCheck();
List resultList = Guavas.newArrayList();
@@ -74,38 +74,32 @@ public class SensitiveWord extends AbstractSensitiveWord {
// v0.21.0 白名单跳过
WordCheckResult checkResult = sensitiveCheck.sensitiveCheck(i, checkContext);
int wordLengthAllow = checkResult.wordLengthResult().wordAllowLen();
- if(wordLengthAllow > 0) {
- i += wordLengthAllow-1;
- continue;
- }
+ int wordLengthDeny = checkResult.wordLengthResult().wordDenyLen();
-
- // 命中
- final WordLengthResult wordLengthResult = checkResult.wordLengthResult();
- int wordLength = wordLengthResult.wordDenyLen();
- if (wordLength > 0) {
+ //如果命中的白名单长度小于黑名单,则直接对黑名单的敏感词进行保存
+ if (wordLengthAllow < wordLengthDeny) {
// 保存敏感词
WordResult wordResult = WordResult.newInstance()
.startIndex(i)
- .endIndex(i+wordLength)
+ .endIndex(i + wordLengthDeny)
.type(checkResult.type())
- .word(wordLengthResult.wordDeny());
+ .word(checkResult.wordLengthResult().wordDeny());
//v0.13.0 添加判断
- if(wordResultCondition.match(wordResult, text, modeEnum, context)) {
+ if (wordResultCondition.match(wordResult, text, modeEnum, context)) {
resultList.add(wordResult);
// 快速返回
if (WordValidModeEnum.FAIL_FAST.equals(modeEnum)) {
break;
}
}
-
-
-
// 增加 i 的步长
// 为什么要-1,因为默认就会自增1
// TODO: 这里可以根据字符串匹配算法优化。
- i += wordLength - 1;
+ i += wordLengthDeny - 1;
+ } else {
+ //如果命中的白名单长度大于黑名单长度,则跳过白名单个字符
+ i += Math.max(0, wordLengthAllow - 1);
}
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
index f7779d1..dbd309a 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
@@ -51,7 +51,7 @@ public class WordCheckWord extends AbstractWordCheck {
int tempLen = 0;
int maxWhite = 0;
int maxBlack = 0;
- int skipLen=0;
+ int skipLen = 0;
for (int i = beginIndex; i < rawChars.length; i++) {
if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) {
@@ -67,24 +67,18 @@ public class WordCheckWord extends AbstractWordCheck {
WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) {
- maxWhite += tempLen;
- if (!failFast) {
- //此处将tempLen设为0,为了防止重复累加
- tempLen = 0;
- }else{
+ maxWhite = tempLen;
+ if (failFast) {
//为falFast模式,主动设为notFound退出循环
- wordContainsTypeEnumAllow=WordContainsTypeEnum.NOT_FOUND;
+ wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND;
}
}
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) {
- maxBlack += tempLen;
- if (!failFast) {
- //此处将tempLen设为0,为了防止重复累加
- tempLen = 0;
- }else{
+ maxBlack = tempLen;
+ if (failFast) {
//为falFast模式,主动设为notFound退出循环
- wordContainsTypeEnumDeny=WordContainsTypeEnum.NOT_FOUND;
+ wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND;
}
}
@@ -95,8 +89,8 @@ public class WordCheckWord extends AbstractWordCheck {
}
String string = stringBuilder.toString();
- String wordAllow = string.substring(0, Math.max(0,maxWhite - skipLen));
- String wordDeny = string.substring(0, Math.max(0,maxBlack - skipLen));
+ String wordAllow = string.substring(0, Math.max(0, maxWhite - skipLen));
+ String wordDeny = string.substring(0, Math.max(0, maxBlack - skipLen));
return WordLengthResult.newInstance()
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsReplaceTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsReplaceTest.java
deleted file mode 100644
index 30ccdc9..0000000
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsReplaceTest.java
+++ /dev/null
@@ -1,20 +0,0 @@
-package com.github.houbb.sensitive.word.bs;
-
-import com.github.houbb.sensitive.word.api.IWordReplace;
-import com.github.houbb.sensitive.word.replace.MyWordReplace;
-import org.junit.Assert;
-import org.junit.Test;
-
-public class SensitiveWordBsReplaceTest {
-
- @Test
- public void defineReplaceTest() {
- final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
- SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().init();
-
- IWordReplace replace = new MyWordReplace();
- String result = sensitiveWordBs.replace(text, replace);
-
- Assert.assertEquals("国家旗帜迎风飘扬,教员的画像屹立在***前。", result);
- }
-}
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java
index 6b4258c..498a5aa 100644
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java
@@ -1,10 +1,12 @@
package com.github.houbb.sensitive.word.bs;
+import com.github.houbb.sensitive.word.api.IWordAllow;
import com.github.houbb.sensitive.word.api.IWordDeny;
import org.junit.Assert;
import org.junit.Test;
import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
public class SensitiveWordFailFastTest {
@@ -20,12 +22,108 @@ public class SensitiveWordFailFastTest {
}
}).init();
- String text = "我在我的家里玩我的世界";
+ SensitiveWordBs bs1 = SensitiveWordBs.newInstance()
+ .failFastWordPattern(true)
+ .wordDeny(new IWordDeny() {
+ @Override
+ public List deny() {
+ return Collections.singletonList("操你妈");
+ }
+ })
+ .wordAllow(new IWordAllow() {
+ @Override
+ public List allow() {
+ return Collections.singletonList("你");
+ }
+ })
+ .init();
+
+ //黑长白短,且初始下标一致
+ SensitiveWordBs bs2 = SensitiveWordBs.newInstance()
+ .failFastWordPattern(true)
+ .wordDeny(new IWordDeny() {
+ @Override
+ public List deny() {
+ return Collections.singletonList("大傻逼");
+ }
+ })
+ .wordAllow(new IWordAllow() {
+ @Override
+ public List allow() {
+ return Collections.singletonList("大");
+ }
+ })
+ .init();
+
+
+
+ //白长黑短,且白和黑初始下标不再一起
+ SensitiveWordBs bs3 = SensitiveWordBs.newInstance()
+ .failFastWordPattern(true)
+ .wordDeny(new IWordDeny() {
+ @Override
+ public List deny() {
+ return Collections.singletonList("口交");
+ }
+ })
+ .wordAllow(new IWordAllow() {
+ @Override
+ public List allow() {
+ return Collections.singletonList("地铁口交易");
+ }
+ })
+ .init();
+
+
+ //白长黑短,且白和黑初始下标在一起
+ SensitiveWordBs bs4 = SensitiveWordBs.newInstance()
+ .failFastWordPattern(true)
+ .wordDeny(new IWordDeny() {
+ @Override
+ public List deny() {
+ return Collections.singletonList("龟孙");
+ }
+ })
+ .wordAllow(new IWordAllow() {
+ @Override
+ public List allow() {
+ return Collections.singletonList("龟孙可");
+ }
+ })
+ .init();
+
+
+
+
+
+
+ String text = "我在我的家里玩我的世界";
List textList = bs.findAll(text);
Assert.assertEquals(Arrays.asList("我的", "我的"), textList);
+
+ String text1 = "操你妈";
+ List textList1 = bs1.findAll(text1);
+ Assert.assertEquals(Collections.singletonList("操你妈"), textList1);
+
+ String text2 = "大傻逼";
+ List textList2 = bs2.findAll(text2);
+ Assert.assertEquals(Collections.singletonList("大傻逼"), textList2);
+
+
+ String text3 = "地铁口交易";
+ List textList3 = bs3.findAll(text3);
+ Assert.assertTrue("Expected empty list", textList3.isEmpty());
+
+ String text4 = "龟孙可";
+ List textList4 = bs4.findAll(text4);
+ Assert.assertTrue("Expected empty list", textList4.isEmpty());
+
+
}
+
+
@Test
public void fallOverTest() {
SensitiveWordBs bs = SensitiveWordBs.newInstance()
@@ -37,11 +135,109 @@ public class SensitiveWordFailFastTest {
}
}).init();
- String text = "我在我的家里玩我的世界";
+ //黑长白短,且初始下标不一致
+ SensitiveWordBs bs1 = SensitiveWordBs.newInstance()
+ .failFastWordPattern(false)
+ .wordDeny(new IWordDeny() {
+ @Override
+ public List deny() {
+ return Collections.singletonList("操你妈");
+ }
+ })
+ .wordAllow(new IWordAllow() {
+ @Override
+ public List allow() {
+ return Collections.singletonList("你");
+ }
+ })
+ .init();
+
+
+ //黑长白短,且初始下标一致
+ SensitiveWordBs bs2 = SensitiveWordBs.newInstance()
+ .failFastWordPattern(false)
+ .wordDeny(new IWordDeny() {
+ @Override
+ public List deny() {
+ return Collections.singletonList("大傻逼");
+ }
+ })
+ .wordAllow(new IWordAllow() {
+ @Override
+ public List allow() {
+ return Collections.singletonList("大");
+ }
+ })
+ .init();
+
+
+
+ //白长黑短,且白和黑初始下标不再一起
+ SensitiveWordBs bs3 = SensitiveWordBs.newInstance()
+ .failFastWordPattern(false)
+ .wordDeny(new IWordDeny() {
+ @Override
+ public List deny() {
+ return Collections.singletonList("口交");
+ }
+ })
+ .wordAllow(new IWordAllow() {
+ @Override
+ public List allow() {
+ return Collections.singletonList("地铁口交易");
+ }
+ })
+ .init();
+
+
+ //白长黑短,且白和黑初始下标在一起
+ SensitiveWordBs bs4 = SensitiveWordBs.newInstance()
+ .failFastWordPattern(false)
+ .wordDeny(new IWordDeny() {
+ @Override
+ public List deny() {
+ return Collections.singletonList("龟孙");
+ }
+ })
+ .wordAllow(new IWordAllow() {
+ @Override
+ public List allow() {
+ return Collections.singletonList("龟孙可");
+ }
+ })
+ .init();
+
+
+
+
+
+
+ String text = "我在我的家里玩我的世界";
List textList = bs.findAll(text);
Assert.assertEquals(Arrays.asList("我的", "我的世界"), textList);
+
+ String text1 = "操你妈";
+ List textList1 = bs1.findAll(text1);
+ Assert.assertEquals(Collections.singletonList("操你妈"), textList1);
+
+ String text2 = "大傻逼";
+ List textList2 = bs2.findAll(text2);
+ Assert.assertEquals(Collections.singletonList("大傻逼"), textList2);
+
+
+ String text3 = "地铁口交易";
+ List textList3 = bs3.findAll(text3);
+ Assert.assertTrue("Expected empty list", textList3.isEmpty());
+
+ String text4 = "龟孙可";
+ List textList4 = bs4.findAll(text4);
+ Assert.assertTrue("Expected empty list", textList4.isEmpty());
+
+
}
+
+
}