From bfe3a606c9bb7439433c725c83f627cd8b2145e0 Mon Sep 17 00:00:00 2001 From: yds <11232266+yuds11@user.noreply.gitee.com> Date: Fri, 2 May 2025 22:05:10 +0800 Subject: [PATCH] =?UTF-8?q?issue110,=E5=B9=B6=E5=BA=9F=E9=99=A4=E4=B8=80?= =?UTF-8?q?=E4=BA=9B=E9=BB=91=E7=99=BD=E5=90=8D=E5=8D=95=E4=B8=80=E6=AC=A1?= =?UTF-8?q?=E9=81=8D=E5=8E=86=E6=97=B6=E7=9A=84=E5=86=97=E4=BD=99=E9=80=BB?= =?UTF-8?q?=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../sensitive/word/api/IWordContext.java | 11 +++ .../sensitive/word/bs/SensitiveWordBs.java | 9 +- .../word/bs/SensitiveWordContext.java | 17 ++++ .../word/support/check/WordCheckWord.java | 41 ++++---- .../support/check/WordCheckWordMaxLen.java | 96 ------------------- .../word/bs/SensitiveWordFailFastTest.java | 47 +++++++++ .../word/bs/SensitiveWordMaxFirstTest.java | 28 ------ 7 files changed, 103 insertions(+), 146 deletions(-) delete mode 100644 src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java create mode 100644 src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java delete mode 100644 src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java index 2b50f08..508899d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java @@ -8,6 +8,17 @@ import com.github.houbb.sensitive.word.bs.SensitiveWordContext; */ public interface IWordContext { + + /** + * 为true时,遇到第一个敏感词词就返回 + * 解决issue110 + * @return + */ + boolean failFastWordPattern(); + + IWordContext failFastWordPattern(boolean failFastWordPattern); + + /** * 是否忽略大小写 * @return 是否 diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index 4f8e50d..86a48b9 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -2,7 +2,6 @@ package com.github.houbb.sensitive.word.bs; import com.github.houbb.heaven.support.handler.IHandler; import com.github.houbb.heaven.util.common.ArgUtil; -import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.sensitive.word.api.*; import com.github.houbb.sensitive.word.api.combine.IWordAllowDenyCombine; @@ -70,6 +69,9 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { */ private boolean ignoreRepeat = false; + private boolean failFastWordPattern = true; + + // 开启校验 /** * 启用数字检测 @@ -278,6 +280,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { context.ignoreChineseStyle(ignoreChineseStyle); context.ignoreEnglishStyle(ignoreEnglishStyle); context.ignoreRepeat(ignoreRepeat); + context.failFastWordPattern(failFastWordPattern); // 开启校验 context.enableNumCheck(enableNumCheck); @@ -579,6 +582,10 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { this.ignoreRepeat = ignoreRepeat; return this; } + public SensitiveWordBs failFastWordPattern(boolean failFastWordPattern) { + this.failFastWordPattern = failFastWordPattern; + return this; + } //------------------------------------------------------------------------------------ 公开方法 START /** diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java index bcd884c..e2a68c8 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java @@ -10,6 +10,12 @@ import com.github.houbb.sensitive.word.support.check.WordChecks; */ public class SensitiveWordContext implements IWordContext { + + /** + * issue110 + */ + private boolean failFastWordPattern; + /** * 忽略大小写 * @since 0.0.4 @@ -220,6 +226,17 @@ public class SensitiveWordContext implements IWordContext { return new SensitiveWordContext(); } + @Override + public boolean failFastWordPattern() { + return failFastWordPattern; + } + + public IWordContext failFastWordPattern(boolean failFastWordPattern){ + this.failFastWordPattern=failFastWordPattern; + return this; + } + + @Override public boolean ignoreCase() { return ignoreCase; diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java index 4fb7488..c3113de 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java @@ -1,14 +1,12 @@ package com.github.houbb.sensitive.word.support.check; import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore; import com.github.houbb.sensitive.word.api.IWordCheck; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordData; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; -import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; import com.github.houbb.sensitive.word.support.result.WordLengthResult; @@ -44,47 +42,48 @@ public class WordCheckWord extends AbstractWordCheck { final IWordData wordData = context.wordData(); final IWordData wordDataAllow = context.wordDataAllow(); final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore(); + final boolean failFast = context.failFastWordPattern(); - // 前一个条件 StringBuilder stringBuilder = new StringBuilder(); char[] rawChars = txt.toCharArray(); - int tempLen = 0; int maxWhite = 0; int maxBlack = 0; - boolean firstCheck = true; - - WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext); - WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext); for (int i = beginIndex; i < rawChars.length; i++) { if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) { tempLen++; continue; } - char mappingChar = formatCharMapping.get(rawChars[i]); stringBuilder.append(mappingChar); tempLen++; - if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow)) { - wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext); - if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) { - maxWhite += tempLen; - wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND; + WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext); + WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext); + + if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) { + maxWhite += tempLen; + if (!failFast) { + //此处将tempLen设为0,为了防止重复累加 + tempLen = 0; + }else{ + //为falFast模式,主动设为notFound退出循环 + wordContainsTypeEnumAllow=WordContainsTypeEnum.NOT_FOUND; } } - if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) { - wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext); - if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) { - maxBlack += tempLen; - wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND; + if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) { + maxBlack += tempLen; + if (!failFast) { + //此处将tempLen设为0,为了防止重复累加 + tempLen = 0; + }else{ + //为falFast模式,主动设为notFound退出循环 + wordContainsTypeEnumDeny=WordContainsTypeEnum.NOT_FOUND; } } - firstCheck = false; - if (WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow) && WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) { break; diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java deleted file mode 100644 index b7c72c0..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java +++ /dev/null @@ -1,96 +0,0 @@ -package com.github.houbb.sensitive.word.support.check; - -import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore; -import com.github.houbb.sensitive.word.api.IWordCheck; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.api.IWordData; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; -import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; -import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; -import com.github.houbb.sensitive.word.support.result.WordLengthResult; - -import java.util.Map; - -/** - * 敏感词监测实现 - * @author binbin.hou - * @since 0.26.0 - */ -@Deprecated -public class WordCheckWordMaxLen extends AbstractWordCheck { - - @Override - protected Class getSensitiveCheckClass() { - return WordCheckWordMaxLen.class; - } - - @Override - protected WordLengthResult getActualLength(int beginIndex, InnerSensitiveWordContext innerContext) { - final String txt = innerContext.originalText(); - final Map formatCharMapping = innerContext.formatCharMapping(); - final IWordContext context = innerContext.wordContext(); - final IWordData wordData = context.wordData(); - final IWordData wordDataAllow = context.wordDataAllow(); - final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore(); - - // 前一个条件 - StringBuilder stringBuilder = new StringBuilder(); - char[] rawChars = txt.toCharArray(); - - int tempLen = 0; - int maxWhite = 0; - int maxBlack = 0; - boolean firstCheck = true; - - WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext); - WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext); - - for (int i = beginIndex; i < rawChars.length; i++) { - if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) { - tempLen++; - continue; - } - - char mappingChar = formatCharMapping.get(rawChars[i]); - stringBuilder.append(mappingChar); - tempLen++; - - if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow)) { - wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext); - if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) { - maxWhite += tempLen; - wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND; - } - } - - // 黑名单命中 - if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) { - wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext); - if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) { - maxBlack += tempLen; - wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND; - } - } - - // 不再是第一次检测 - firstCheck = false; - - // 黑白名单都未匹配 - if (WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow) && - WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) { - break; - } - } - - return WordLengthResult.newInstance() - .wordAllowLen(maxWhite) - .wordDenyLen(maxBlack); - } - - @Override - protected String getType() { - return WordTypeEnum.WORD.getCode(); - } - -} diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java new file mode 100644 index 0000000..6b4258c --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordFailFastTest.java @@ -0,0 +1,47 @@ +package com.github.houbb.sensitive.word.bs; + +import com.github.houbb.sensitive.word.api.IWordDeny; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +public class SensitiveWordFailFastTest { + + @Test + public void failFastTest() { + SensitiveWordBs bs = SensitiveWordBs.newInstance() + .failFastWordPattern(true) + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Arrays.asList("我的世界", "我的"); + } + }).init(); + + String text = "我在我的家里玩我的世界"; + + List textList = bs.findAll(text); + Assert.assertEquals(Arrays.asList("我的", "我的"), textList); + + } + @Test + public void fallOverTest() { + SensitiveWordBs bs = SensitiveWordBs.newInstance() + .failFastWordPattern(false) + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Arrays.asList("我的世界", "我的"); + } + }).init(); + + String text = "我在我的家里玩我的世界"; + + List textList = bs.findAll(text); + Assert.assertEquals(Arrays.asList("我的", "我的世界"), textList); + + } + +} diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java deleted file mode 100644 index 2c0f819..0000000 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.github.houbb.sensitive.word.bs; - -import com.github.houbb.sensitive.word.api.IWordDeny; -import org.junit.Assert; -import org.junit.Test; - -import java.util.Arrays; -import java.util.List; - -public class SensitiveWordMaxFirstTest { - - @Test - public void maxFirstTest() { - SensitiveWordBs bs = SensitiveWordBs.newInstance() - .wordDeny(new IWordDeny() { - @Override - public List deny() { - return Arrays.asList("我的世界", "我的"); - } - }).init(); - - String text = "我的世界我的好玩"; - - List textList = bs.findAll(text); -// Assert.assertEquals("", textList.toString()); - } - -}