From b384198d5b261a602b8899ba7188ab446b7d6fed Mon Sep 17 00:00:00 2001 From: yudasen <2436348937@qq.com> Date: Sun, 16 Feb 2025 18:42:31 +0800 Subject: [PATCH 1/6] =?UTF-8?q?feature:=20=E6=8F=90=E4=BE=9B=E8=87=AA?= =?UTF-8?q?=E5=AE=9A=E4=B9=89=E6=9B=BF=E6=8D=A2=E7=AD=96=E7=95=A5=E7=9A=84?= =?UTF-8?q?api,=E8=A7=A3=E5=86=B3issue#36=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../sensitive/word/api/ISensitiveWord.java | 8 +++++--- .../sensitive/word/bs/SensitiveWordBs.java | 5 ++++- .../word/core/AbstractSensitiveWord.java | 7 +++---- .../word/bs/SensitiveWordBsReplaceTest.java | 20 +++++++++++++++++++ 4 files changed, 32 insertions(+), 8 deletions(-) create mode 100644 src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsReplaceTest.java diff --git a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java index c3de23d..fe921a4 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java @@ -33,16 +33,18 @@ public interface ISensitiveWord { /** * 替换所有敏感词内容 - * + *

* ps: 这里可以添加优化。 * - * @param target 目标字符串 + * @param target 目标字符串 * @param context 上下文 + * @param replace 替换策略 * @return 替换后结果 * @since 0.3.2 */ String replace(final String target, - final IWordContext context); + final IWordContext context, + final IWordReplace replace); /** * 包含 diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index 31441f3..a9afa35 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -592,7 +592,10 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { * @since 0.2.0 */ public String replace(final String target) { - return sensitiveWord.replace(target, context); + return this.replace(target,context.wordReplace()); + } + public String replace(final String target, IWordReplace replace) { + return sensitiveWord.replace(target, context, replace); } /** diff --git a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java index 7648300..28c8866 100644 --- a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java @@ -38,10 +38,9 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord { * @return 结果 * @since 0.3.2 */ - protected String doReplace(String target, List allList, IWordContext context) { + protected String doReplace(String target, List allList, IWordContext context, IWordReplace replace) { // 根据 index 直接分割 - final IWordReplace replace = context.wordReplace(); // 是否需要对 allList 排序? StringBuilder stringBuilder = new StringBuilder(); @@ -91,7 +90,7 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord { } @Override - public String replace(String target, IWordContext context) { + public String replace(String target, IWordContext context, IWordReplace replace) { if(StringUtil.isEmpty(target)) { return target; } @@ -101,7 +100,7 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord { return target; } - return doReplace(target, allList, context); + return doReplace(target, allList, context, replace); } @Override diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsReplaceTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsReplaceTest.java new file mode 100644 index 0000000..30ccdc9 --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsReplaceTest.java @@ -0,0 +1,20 @@ +package com.github.houbb.sensitive.word.bs; + +import com.github.houbb.sensitive.word.api.IWordReplace; +import com.github.houbb.sensitive.word.replace.MyWordReplace; +import org.junit.Assert; +import org.junit.Test; + +public class SensitiveWordBsReplaceTest { + + @Test + public void defineReplaceTest() { + final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; + SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().init(); + + IWordReplace replace = new MyWordReplace(); + String result = sensitiveWordBs.replace(text, replace); + + Assert.assertEquals("国家旗帜迎风飘扬,教员的画像屹立在***前。", result); + } +} From bfe3a606c9bb7439433c725c83f627cd8b2145e0 Mon Sep 17 00:00:00 2001 From: yds <11232266+yuds11@user.noreply.gitee.com> Date: Fri, 2 May 2025 22:05:10 +0800 Subject: [PATCH 2/6] =?UTF-8?q?issue110,=E5=B9=B6=E5=BA=9F=E9=99=A4?= =?UTF-8?q?=E4=B8=80=E4=BA=9B=E9=BB=91=E7=99=BD=E5=90=8D=E5=8D=95=E4=B8=80?= =?UTF-8?q?=E6=AC=A1=E9=81=8D=E5=8E=86=E6=97=B6=E7=9A=84=E5=86=97=E4=BD=99?= =?UTF-8?q?=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../sensitive/word/api/IWordContext.java | 11 +++ .../sensitive/word/bs/SensitiveWordBs.java | 9 +- .../word/bs/SensitiveWordContext.java | 17 ++++ .../word/support/check/WordCheckWord.java | 41 ++++---- .../support/check/WordCheckWordMaxLen.java | 96 ------------------- .../word/bs/SensitiveWordFailFastTest.java | 47 +++++++++ .../word/bs/SensitiveWordMaxFirstTest.java | 28 ------ 7 files changed, 103 insertions(+), 146 deletions(-) delete mode 100644 src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java create mode 100644 src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java delete mode 100644 src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java index 2b50f08..508899d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java @@ -8,6 +8,17 @@ import com.github.houbb.sensitive.word.bs.SensitiveWordContext; */ public interface IWordContext { + + /** + * 为true时,遇到第一个敏感词词就返回 + * 解决issue110 + * @return + */ + boolean failFastWordPattern(); + + IWordContext failFastWordPattern(boolean failFastWordPattern); + + /** * 是否忽略大小写 * @return 是否 diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index 4f8e50d..86a48b9 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -2,7 +2,6 @@ package com.github.houbb.sensitive.word.bs; import com.github.houbb.heaven.support.handler.IHandler; import com.github.houbb.heaven.util.common.ArgUtil; -import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.sensitive.word.api.*; import com.github.houbb.sensitive.word.api.combine.IWordAllowDenyCombine; @@ -70,6 +69,9 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { */ private boolean ignoreRepeat = false; + private boolean failFastWordPattern = true; + + // 开启校验 /** * 启用数字检测 @@ -278,6 +280,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { context.ignoreChineseStyle(ignoreChineseStyle); context.ignoreEnglishStyle(ignoreEnglishStyle); context.ignoreRepeat(ignoreRepeat); + context.failFastWordPattern(failFastWordPattern); // 开启校验 context.enableNumCheck(enableNumCheck); @@ -579,6 +582,10 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { this.ignoreRepeat = ignoreRepeat; return this; } + public SensitiveWordBs failFastWordPattern(boolean failFastWordPattern) { + this.failFastWordPattern = failFastWordPattern; + return this; + } //------------------------------------------------------------------------------------ 公开方法 START /** diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java index bcd884c..e2a68c8 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java @@ -10,6 +10,12 @@ import com.github.houbb.sensitive.word.support.check.WordChecks; */ public class SensitiveWordContext implements IWordContext { + + /** + * issue110 + */ + private boolean failFastWordPattern; + /** * 忽略大小写 * @since 0.0.4 @@ -220,6 +226,17 @@ public class SensitiveWordContext implements IWordContext { return new SensitiveWordContext(); } + @Override + public boolean failFastWordPattern() { + return failFastWordPattern; + } + + public IWordContext failFastWordPattern(boolean failFastWordPattern){ + this.failFastWordPattern=failFastWordPattern; + return this; + } + + @Override public boolean ignoreCase() { return ignoreCase; diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java index 4fb7488..c3113de 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java @@ -1,14 +1,12 @@ package com.github.houbb.sensitive.word.support.check; import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore; import com.github.houbb.sensitive.word.api.IWordCheck; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordData; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; -import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; import com.github.houbb.sensitive.word.support.result.WordLengthResult; @@ -44,47 +42,48 @@ public class WordCheckWord extends AbstractWordCheck { final IWordData wordData = context.wordData(); final IWordData wordDataAllow = context.wordDataAllow(); final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore(); + final boolean failFast = context.failFastWordPattern(); - // 前一个条件 StringBuilder stringBuilder = new StringBuilder(); char[] rawChars = txt.toCharArray(); - int tempLen = 0; int maxWhite = 0; int maxBlack = 0; - boolean firstCheck = true; - - WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext); - WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext); for (int i = beginIndex; i < rawChars.length; i++) { if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) { tempLen++; continue; } - char mappingChar = formatCharMapping.get(rawChars[i]); stringBuilder.append(mappingChar); tempLen++; - if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow)) { - wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext); - if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) { - maxWhite += tempLen; - wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND; + WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext); + WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext); + + if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) { + maxWhite += tempLen; + if (!failFast) { + //此处将tempLen设为0,为了防止重复累加 + tempLen = 0; + }else{ + //为falFast模式,主动设为notFound退出循环 + wordContainsTypeEnumAllow=WordContainsTypeEnum.NOT_FOUND; } } - if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) { - wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext); - if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) { - maxBlack += tempLen; - wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND; + if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) { + maxBlack += tempLen; + if (!failFast) { + //此处将tempLen设为0,为了防止重复累加 + tempLen = 0; + }else{ + //为falFast模式,主动设为notFound退出循环 + wordContainsTypeEnumDeny=WordContainsTypeEnum.NOT_FOUND; } } - firstCheck = false; - if (WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow) && WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) { break; diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java deleted file mode 100644 index b7c72c0..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java +++ /dev/null @@ -1,96 +0,0 @@ -package com.github.houbb.sensitive.word.support.check; - -import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore; -import com.github.houbb.sensitive.word.api.IWordCheck; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.api.IWordData; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; -import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; -import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; -import com.github.houbb.sensitive.word.support.result.WordLengthResult; - -import java.util.Map; - -/** - * 敏感词监测实现 - * @author binbin.hou - * @since 0.26.0 - */ -@Deprecated -public class WordCheckWordMaxLen extends AbstractWordCheck { - - @Override - protected Class getSensitiveCheckClass() { - return WordCheckWordMaxLen.class; - } - - @Override - protected WordLengthResult getActualLength(int beginIndex, InnerSensitiveWordContext innerContext) { - final String txt = innerContext.originalText(); - final Map formatCharMapping = innerContext.formatCharMapping(); - final IWordContext context = innerContext.wordContext(); - final IWordData wordData = context.wordData(); - final IWordData wordDataAllow = context.wordDataAllow(); - final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore(); - - // 前一个条件 - StringBuilder stringBuilder = new StringBuilder(); - char[] rawChars = txt.toCharArray(); - - int tempLen = 0; - int maxWhite = 0; - int maxBlack = 0; - boolean firstCheck = true; - - WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext); - WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext); - - for (int i = beginIndex; i < rawChars.length; i++) { - if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) { - tempLen++; - continue; - } - - char mappingChar = formatCharMapping.get(rawChars[i]); - stringBuilder.append(mappingChar); - tempLen++; - - if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow)) { - wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext); - if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) { - maxWhite += tempLen; - wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND; - } - } - - // 黑名单命中 - if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) { - wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext); - if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) { - maxBlack += tempLen; - wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND; - } - } - - // 不再是第一次检测 - firstCheck = false; - - // 黑白名单都未匹配 - if (WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow) && - WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) { - break; - } - } - - return WordLengthResult.newInstance() - .wordAllowLen(maxWhite) - .wordDenyLen(maxBlack); - } - - @Override - protected String getType() { - return WordTypeEnum.WORD.getCode(); - } - -} diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java new file mode 100644 index 0000000..6b4258c --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java @@ -0,0 +1,47 @@ +package com.github.houbb.sensitive.word.bs; + +import com.github.houbb.sensitive.word.api.IWordDeny; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +public class SensitiveWordFailFastTest { + + @Test + public void failFastTest() { + SensitiveWordBs bs = SensitiveWordBs.newInstance() + .failFastWordPattern(true) + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Arrays.asList("我的世界", "我的"); + } + }).init(); + + String text = "我在我的家里玩我的世界"; + + List textList = bs.findAll(text); + Assert.assertEquals(Arrays.asList("我的", "我的"), textList); + + } + @Test + public void fallOverTest() { + SensitiveWordBs bs = SensitiveWordBs.newInstance() + .failFastWordPattern(false) + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Arrays.asList("我的世界", "我的"); + } + }).init(); + + String text = "我在我的家里玩我的世界"; + + List textList = bs.findAll(text); + Assert.assertEquals(Arrays.asList("我的", "我的世界"), textList); + + } + +} diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java deleted file mode 100644 index 2c0f819..0000000 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.github.houbb.sensitive.word.bs; - -import com.github.houbb.sensitive.word.api.IWordDeny; -import org.junit.Assert; -import org.junit.Test; - -import java.util.Arrays; -import java.util.List; - -public class SensitiveWordMaxFirstTest { - - @Test - public void maxFirstTest() { - SensitiveWordBs bs = SensitiveWordBs.newInstance() - .wordDeny(new IWordDeny() { - @Override - public List deny() { - return Arrays.asList("我的世界", "我的"); - } - }).init(); - - String text = "我的世界我的好玩"; - - List textList = bs.findAll(text); -// Assert.assertEquals("", textList.toString()); - } - -} From 849dd6438043c7cc96b92464685cc5f45bc72804 Mon Sep 17 00:00:00 2001 From: yds <11232266+yuds11@user.noreply.gitee.com> Date: Fri, 2 May 2025 22:48:50 +0800 Subject: [PATCH 3/6] =?UTF-8?q?issue110,=E5=B9=B6=E5=BA=9F=E9=99=A4?= =?UTF-8?q?=E4=B8=80=E4=BA=9B=E9=BB=91=E7=99=BD=E5=90=8D=E5=8D=95=E4=B8=80?= =?UTF-8?q?=E6=AC=A1=E9=81=8D=E5=8E=86=E6=97=B6=E7=9A=84=E5=86=97=E4=BD=99?= =?UTF-8?q?=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../sensitive/word/support/check/WordCheckWord.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java index c3113de..a72abd6 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java @@ -9,6 +9,7 @@ import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; import com.github.houbb.sensitive.word.support.result.WordLengthResult; +import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils; import java.util.Map; @@ -90,9 +91,18 @@ public class WordCheckWord extends AbstractWordCheck { } } + String whiteWord = txt.substring(beginIndex, beginIndex + maxWhite); + String blackWord = txt.substring(beginIndex, beginIndex + maxBlack); + + String formatWhiteWord= InnerWordFormatUtils.format(whiteWord,context); + String formatBlackWord= InnerWordFormatUtils.format(blackWord,context); + + return WordLengthResult.newInstance() .wordAllowLen(maxWhite) - .wordDenyLen(maxBlack); + .wordDenyLen(maxBlack) + .wordAllow(formatWhiteWord) + .wordDeny(formatBlackWord); } @Override From 91e811b360c780f90462c8f826ffc241bbfe06a5 Mon Sep 17 00:00:00 2001 From: yds <11232266+yuds11@user.noreply.gitee.com> Date: Fri, 2 May 2025 22:49:43 +0800 Subject: [PATCH 4/6] =?UTF-8?q?issue110,=E5=B9=B6=E5=BA=9F=E9=99=A4?= =?UTF-8?q?=E4=B8=80=E4=BA=9B=E9=BB=91=E7=99=BD=E5=90=8D=E5=8D=95=E4=B8=80?= =?UTF-8?q?=E6=AC=A1=E9=81=8D=E5=8E=86=E6=97=B6=E7=9A=84=E5=86=97=E4=BD=99?= =?UTF-8?q?=E9=80=BB=E8=BE=91,=E5=B9=B6=E8=A7=A3=E5=86=B3=E8=BF=94?= =?UTF-8?q?=E5=9B=9E=E5=AE=9E=E9=99=85=E5=80=BC=E6=97=B6=E7=9A=84=E5=86=B2?= =?UTF-8?q?=E7=AA=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../sensitive/word/bs/SensitiveWordContext.java | 2 ++ .../word/support/check/WordCheckWord.java | 16 +++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java index e2a68c8..6f142fd 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java @@ -231,6 +231,8 @@ public class SensitiveWordContext implements IWordContext { return failFastWordPattern; } + + @Override public IWordContext failFastWordPattern(boolean failFastWordPattern){ this.failFastWordPattern=failFastWordPattern; return this; diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java index a72abd6..f7779d1 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java @@ -5,6 +5,7 @@ import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore; import com.github.houbb.sensitive.word.api.IWordCheck; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordData; +import com.github.houbb.sensitive.word.api.IWordFormat; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; @@ -50,10 +51,12 @@ public class WordCheckWord extends AbstractWordCheck { int tempLen = 0; int maxWhite = 0; int maxBlack = 0; + int skipLen=0; for (int i = beginIndex; i < rawChars.length; i++) { if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) { tempLen++; + skipLen++; continue; } char mappingChar = formatCharMapping.get(rawChars[i]); @@ -91,20 +94,19 @@ public class WordCheckWord extends AbstractWordCheck { } } - String whiteWord = txt.substring(beginIndex, beginIndex + maxWhite); - String blackWord = txt.substring(beginIndex, beginIndex + maxBlack); - - String formatWhiteWord= InnerWordFormatUtils.format(whiteWord,context); - String formatBlackWord= InnerWordFormatUtils.format(blackWord,context); + String string = stringBuilder.toString(); + String wordAllow = string.substring(0, Math.max(0,maxWhite - skipLen)); + String wordDeny = string.substring(0, Math.max(0,maxBlack - skipLen)); return WordLengthResult.newInstance() .wordAllowLen(maxWhite) .wordDenyLen(maxBlack) - .wordAllow(formatWhiteWord) - .wordDeny(formatBlackWord); + .wordAllow(wordAllow) + .wordDeny(wordDeny); } + @Override protected String getType() { return WordTypeEnum.WORD.getCode(); From c35772d70e3c1b65dce3ccf4c91e30849e206723 Mon Sep 17 00:00:00 2001 From: yds <11232266+yuds11@user.noreply.gitee.com> Date: Fri, 2 May 2025 23:31:43 +0800 Subject: [PATCH 5/6] =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=AF=AF=E5=8A=A0?= =?UTF-8?q?=E7=9A=84replace?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../houbb/sensitive/word/api/ISensitiveWord.java | 14 ++++++-------- .../houbb/sensitive/word/bs/SensitiveWordBs.java | 6 ++---- .../sensitive/word/core/AbstractSensitiveWord.java | 8 ++++---- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java index fe921a4..17f246d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java @@ -19,7 +19,7 @@ public interface ISensitiveWord { * @see WordValidModeEnum#FAIL_OVER 建议使用全部检测返回模式 */ List findAll(final String string, - final IWordContext context); + final IWordContext context); /** * 返回第一个对应的敏感词 @@ -29,22 +29,20 @@ public interface ISensitiveWord { * @since 0.3.2 */ IWordResult findFirst(final String string, - final IWordContext context); + final IWordContext context); /** * 替换所有敏感词内容 - *

+ * * ps: 这里可以添加优化。 * - * @param target 目标字符串 + * @param target 目标字符串 * @param context 上下文 - * @param replace 替换策略 * @return 替换后结果 * @since 0.3.2 */ String replace(final String target, - final IWordContext context, - final IWordReplace replace); + final IWordContext context); /** * 包含 @@ -54,6 +52,6 @@ public interface ISensitiveWord { * @since 0.3.2 */ boolean contains(final String string, - final IWordContext context); + final IWordContext context); } diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index 86a48b9..8d10b5b 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -672,12 +672,10 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { * @since 0.2.0 */ public String replace(final String target) { - return this.replace(target,context.wordReplace()); - } - public String replace(final String target, IWordReplace replace) { - return sensitiveWord.replace(target, context, replace); + return sensitiveWord.replace(target, context); } + /** * 获取敏感词的标签 * diff --git a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java index 28c8866..c3ce135 100644 --- a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java @@ -38,8 +38,9 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord { * @return 结果 * @since 0.3.2 */ - protected String doReplace(String target, List allList, IWordContext context, IWordReplace replace) { + protected String doReplace(String target, List allList, IWordContext context) { // 根据 index 直接分割 + final IWordReplace replace = context.wordReplace(); // 是否需要对 allList 排序? StringBuilder stringBuilder = new StringBuilder(); @@ -90,7 +91,7 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord { } @Override - public String replace(String target, IWordContext context, IWordReplace replace) { + public String replace(String target, IWordContext context) { if(StringUtil.isEmpty(target)) { return target; } @@ -100,9 +101,8 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord { return target; } - return doReplace(target, allList, context, replace); + return doReplace(target, allList, context); } - @Override public boolean contains(String string, IWordContext context) { //1. 第一个存在 From 6132261e77a7c381ce109d3386a847fa764b0f86 Mon Sep 17 00:00:00 2001 From: yds <11232266+yuds11@user.noreply.gitee.com> Date: Fri, 2 May 2025 23:31:43 +0800 Subject: [PATCH 6/6] =?UTF-8?q?=E9=BB=91=E7=99=BD=E5=90=8D=E5=8D=95?= =?UTF-8?q?=E5=85=B1=E5=90=8C=E6=A3=80=E6=B5=8B=E6=97=B6=E9=80=BB=E8=BE=91?= =?UTF-8?q?=E7=9A=84=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../sensitive/word/api/ISensitiveWord.java | 14 +- .../sensitive/word/bs/SensitiveWordBs.java | 6 +- .../word/core/AbstractSensitiveWord.java | 8 +- .../sensitive/word/core/SensitiveWord.java | 34 ++- .../word/support/check/WordCheckWord.java | 24 +-- .../word/bs/SensitiveWordBsReplaceTest.java | 20 -- .../word/bs/SensitiveWordFailFastTest.java | 200 +++++++++++++++++- 7 files changed, 233 insertions(+), 73 deletions(-) delete mode 100644 src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsReplaceTest.java diff --git a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java index fe921a4..17f246d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java @@ -19,7 +19,7 @@ public interface ISensitiveWord { * @see WordValidModeEnum#FAIL_OVER 建议使用全部检测返回模式 */ List findAll(final String string, - final IWordContext context); + final IWordContext context); /** * 返回第一个对应的敏感词 @@ -29,22 +29,20 @@ public interface ISensitiveWord { * @since 0.3.2 */ IWordResult findFirst(final String string, - final IWordContext context); + final IWordContext context); /** * 替换所有敏感词内容 - *

+ * * ps: 这里可以添加优化。 * - * @param target 目标字符串 + * @param target 目标字符串 * @param context 上下文 - * @param replace 替换策略 * @return 替换后结果 * @since 0.3.2 */ String replace(final String target, - final IWordContext context, - final IWordReplace replace); + final IWordContext context); /** * 包含 @@ -54,6 +52,6 @@ public interface ISensitiveWord { * @since 0.3.2 */ boolean contains(final String string, - final IWordContext context); + final IWordContext context); } diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index 86a48b9..8d10b5b 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -672,12 +672,10 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { * @since 0.2.0 */ public String replace(final String target) { - return this.replace(target,context.wordReplace()); - } - public String replace(final String target, IWordReplace replace) { - return sensitiveWord.replace(target, context, replace); + return sensitiveWord.replace(target, context); } + /** * 获取敏感词的标签 * diff --git a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java index 28c8866..c3ce135 100644 --- a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java @@ -38,8 +38,9 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord { * @return 结果 * @since 0.3.2 */ - protected String doReplace(String target, List allList, IWordContext context, IWordReplace replace) { + protected String doReplace(String target, List allList, IWordContext context) { // 根据 index 直接分割 + final IWordReplace replace = context.wordReplace(); // 是否需要对 allList 排序? StringBuilder stringBuilder = new StringBuilder(); @@ -90,7 +91,7 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord { } @Override - public String replace(String target, IWordContext context, IWordReplace replace) { + public String replace(String target, IWordContext context) { if(StringUtil.isEmpty(target)) { return target; } @@ -100,9 +101,8 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord { return target; } - return doReplace(target, allList, context, replace); + return doReplace(target, allList, context); } - @Override public boolean contains(String string, IWordContext context) { //1. 第一个存在 diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java index 8147d34..7af8705 100644 --- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java @@ -37,7 +37,7 @@ public class SensitiveWord extends AbstractSensitiveWord { @Override protected IWordResult doFindFirst(String string, IWordContext context) { List wordResults = innerSensitiveWords(string, WordValidModeEnum.FAIL_FAST, context); - if(!CollectionUtil.isEmpty(wordResults)){ + if (!CollectionUtil.isEmpty(wordResults)) { return wordResults.get(0); } return null; @@ -47,14 +47,14 @@ public class SensitiveWord extends AbstractSensitiveWord { /** * 获取敏感词列表 * - * @param text 文本 + * @param text 文本 * @param modeEnum 模式 * @return 结果列表 * @since 0.0.1 */ private List innerSensitiveWords(final String text, - final WordValidModeEnum modeEnum, - final IWordContext context) { + final WordValidModeEnum modeEnum, + final IWordContext context) { //1. 是否存在敏感词,如果比存在,直接返回空列表 final IWordCheck sensitiveCheck = context.sensitiveCheck(); List resultList = Guavas.newArrayList(); @@ -74,38 +74,32 @@ public class SensitiveWord extends AbstractSensitiveWord { // v0.21.0 白名单跳过 WordCheckResult checkResult = sensitiveCheck.sensitiveCheck(i, checkContext); int wordLengthAllow = checkResult.wordLengthResult().wordAllowLen(); - if(wordLengthAllow > 0) { - i += wordLengthAllow-1; - continue; - } + int wordLengthDeny = checkResult.wordLengthResult().wordDenyLen(); - - // 命中 - final WordLengthResult wordLengthResult = checkResult.wordLengthResult(); - int wordLength = wordLengthResult.wordDenyLen(); - if (wordLength > 0) { + //如果命中的白名单长度小于黑名单,则直接对黑名单的敏感词进行保存 + if (wordLengthAllow < wordLengthDeny) { // 保存敏感词 WordResult wordResult = WordResult.newInstance() .startIndex(i) - .endIndex(i+wordLength) + .endIndex(i + wordLengthDeny) .type(checkResult.type()) - .word(wordLengthResult.wordDeny()); + .word(checkResult.wordLengthResult().wordDeny()); //v0.13.0 添加判断 - if(wordResultCondition.match(wordResult, text, modeEnum, context)) { + if (wordResultCondition.match(wordResult, text, modeEnum, context)) { resultList.add(wordResult); // 快速返回 if (WordValidModeEnum.FAIL_FAST.equals(modeEnum)) { break; } } - - - // 增加 i 的步长 // 为什么要-1,因为默认就会自增1 // TODO: 这里可以根据字符串匹配算法优化。 - i += wordLength - 1; + i += wordLengthDeny - 1; + } else { + //如果命中的白名单长度大于黑名单长度,则跳过白名单个字符 + i += Math.max(0, wordLengthAllow - 1); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java index f7779d1..dbd309a 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java @@ -51,7 +51,7 @@ public class WordCheckWord extends AbstractWordCheck { int tempLen = 0; int maxWhite = 0; int maxBlack = 0; - int skipLen=0; + int skipLen = 0; for (int i = beginIndex; i < rawChars.length; i++) { if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) { @@ -67,24 +67,18 @@ public class WordCheckWord extends AbstractWordCheck { WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext); if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) { - maxWhite += tempLen; - if (!failFast) { - //此处将tempLen设为0,为了防止重复累加 - tempLen = 0; - }else{ + maxWhite = tempLen; + if (failFast) { //为falFast模式,主动设为notFound退出循环 - wordContainsTypeEnumAllow=WordContainsTypeEnum.NOT_FOUND; + wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND; } } if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) { - maxBlack += tempLen; - if (!failFast) { - //此处将tempLen设为0,为了防止重复累加 - tempLen = 0; - }else{ + maxBlack = tempLen; + if (failFast) { //为falFast模式,主动设为notFound退出循环 - wordContainsTypeEnumDeny=WordContainsTypeEnum.NOT_FOUND; + wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND; } } @@ -95,8 +89,8 @@ public class WordCheckWord extends AbstractWordCheck { } String string = stringBuilder.toString(); - String wordAllow = string.substring(0, Math.max(0,maxWhite - skipLen)); - String wordDeny = string.substring(0, Math.max(0,maxBlack - skipLen)); + String wordAllow = string.substring(0, Math.max(0, maxWhite - skipLen)); + String wordDeny = string.substring(0, Math.max(0, maxBlack - skipLen)); return WordLengthResult.newInstance() diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsReplaceTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsReplaceTest.java deleted file mode 100644 index 30ccdc9..0000000 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsReplaceTest.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.github.houbb.sensitive.word.bs; - -import com.github.houbb.sensitive.word.api.IWordReplace; -import com.github.houbb.sensitive.word.replace.MyWordReplace; -import org.junit.Assert; -import org.junit.Test; - -public class SensitiveWordBsReplaceTest { - - @Test - public void defineReplaceTest() { - final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; - SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().init(); - - IWordReplace replace = new MyWordReplace(); - String result = sensitiveWordBs.replace(text, replace); - - Assert.assertEquals("国家旗帜迎风飘扬,教员的画像屹立在***前。", result); - } -} diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java index 6b4258c..498a5aa 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java @@ -1,10 +1,12 @@ package com.github.houbb.sensitive.word.bs; +import com.github.houbb.sensitive.word.api.IWordAllow; import com.github.houbb.sensitive.word.api.IWordDeny; import org.junit.Assert; import org.junit.Test; import java.util.Arrays; +import java.util.Collections; import java.util.List; public class SensitiveWordFailFastTest { @@ -20,12 +22,108 @@ public class SensitiveWordFailFastTest { } }).init(); - String text = "我在我的家里玩我的世界"; + SensitiveWordBs bs1 = SensitiveWordBs.newInstance() + .failFastWordPattern(true) + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Collections.singletonList("操你妈"); + } + }) + .wordAllow(new IWordAllow() { + @Override + public List allow() { + return Collections.singletonList("你"); + } + }) + .init(); + + //黑长白短,且初始下标一致 + SensitiveWordBs bs2 = SensitiveWordBs.newInstance() + .failFastWordPattern(true) + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Collections.singletonList("大傻逼"); + } + }) + .wordAllow(new IWordAllow() { + @Override + public List allow() { + return Collections.singletonList("大"); + } + }) + .init(); + + + + //白长黑短,且白和黑初始下标不再一起 + SensitiveWordBs bs3 = SensitiveWordBs.newInstance() + .failFastWordPattern(true) + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Collections.singletonList("口交"); + } + }) + .wordAllow(new IWordAllow() { + @Override + public List allow() { + return Collections.singletonList("地铁口交易"); + } + }) + .init(); + + + //白长黑短,且白和黑初始下标在一起 + SensitiveWordBs bs4 = SensitiveWordBs.newInstance() + .failFastWordPattern(true) + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Collections.singletonList("龟孙"); + } + }) + .wordAllow(new IWordAllow() { + @Override + public List allow() { + return Collections.singletonList("龟孙可"); + } + }) + .init(); + + + + + + + String text = "我在我的家里玩我的世界"; List textList = bs.findAll(text); Assert.assertEquals(Arrays.asList("我的", "我的"), textList); + + String text1 = "操你妈"; + List textList1 = bs1.findAll(text1); + Assert.assertEquals(Collections.singletonList("操你妈"), textList1); + + String text2 = "大傻逼"; + List textList2 = bs2.findAll(text2); + Assert.assertEquals(Collections.singletonList("大傻逼"), textList2); + + + String text3 = "地铁口交易"; + List textList3 = bs3.findAll(text3); + Assert.assertTrue("Expected empty list", textList3.isEmpty()); + + String text4 = "龟孙可"; + List textList4 = bs4.findAll(text4); + Assert.assertTrue("Expected empty list", textList4.isEmpty()); + + } + + @Test public void fallOverTest() { SensitiveWordBs bs = SensitiveWordBs.newInstance() @@ -37,11 +135,109 @@ public class SensitiveWordFailFastTest { } }).init(); - String text = "我在我的家里玩我的世界"; + //黑长白短,且初始下标不一致 + SensitiveWordBs bs1 = SensitiveWordBs.newInstance() + .failFastWordPattern(false) + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Collections.singletonList("操你妈"); + } + }) + .wordAllow(new IWordAllow() { + @Override + public List allow() { + return Collections.singletonList("你"); + } + }) + .init(); + + + //黑长白短,且初始下标一致 + SensitiveWordBs bs2 = SensitiveWordBs.newInstance() + .failFastWordPattern(false) + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Collections.singletonList("大傻逼"); + } + }) + .wordAllow(new IWordAllow() { + @Override + public List allow() { + return Collections.singletonList("大"); + } + }) + .init(); + + + + //白长黑短,且白和黑初始下标不再一起 + SensitiveWordBs bs3 = SensitiveWordBs.newInstance() + .failFastWordPattern(false) + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Collections.singletonList("口交"); + } + }) + .wordAllow(new IWordAllow() { + @Override + public List allow() { + return Collections.singletonList("地铁口交易"); + } + }) + .init(); + + + //白长黑短,且白和黑初始下标在一起 + SensitiveWordBs bs4 = SensitiveWordBs.newInstance() + .failFastWordPattern(false) + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Collections.singletonList("龟孙"); + } + }) + .wordAllow(new IWordAllow() { + @Override + public List allow() { + return Collections.singletonList("龟孙可"); + } + }) + .init(); + + + + + + + String text = "我在我的家里玩我的世界"; List textList = bs.findAll(text); Assert.assertEquals(Arrays.asList("我的", "我的世界"), textList); + + String text1 = "操你妈"; + List textList1 = bs1.findAll(text1); + Assert.assertEquals(Collections.singletonList("操你妈"), textList1); + + String text2 = "大傻逼"; + List textList2 = bs2.findAll(text2); + Assert.assertEquals(Collections.singletonList("大傻逼"), textList2); + + + String text3 = "地铁口交易"; + List textList3 = bs3.findAll(text3); + Assert.assertTrue("Expected empty list", textList3.isEmpty()); + + String text4 = "龟孙可"; + List textList4 = bs4.findAll(text4); + Assert.assertTrue("Expected empty list", textList4.isEmpty()); + + } + + }