diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java index bfaa83c..20f90b3 100644 --- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java @@ -4,10 +4,8 @@ import com.github.houbb.heaven.util.guava.Guavas; import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.sensitive.word.api.*; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; -import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum; import com.github.houbb.sensitive.word.support.check.WordCheckResult; -import com.github.houbb.sensitive.word.support.check.WordCheckWordAllow; import com.github.houbb.sensitive.word.support.result.WordResult; import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils; @@ -71,21 +69,18 @@ public class SensitiveWord extends AbstractSensitiveWord { .formatCharMapping(characterCharacterMap); final IWordResultCondition wordResultCondition = context.wordResultCondition(); - final IWordCheck wordCheckAllow = new WordCheckWordAllow(); - for (int i = 0; i < text.length(); i++) { - // v0.21.0 白名单跳过 TODO: 感觉这种实现性能一般,考虑后续优化。 - WordCheckResult wordCheckAllowResult = wordCheckAllow.sensitiveCheck(i, checkContext); - int wordLengthAllow = wordCheckAllowResult.index(); + // v0.21.0 白名单跳过 + WordCheckResult checkResult = sensitiveCheck.sensitiveCheck(i, checkContext); + int wordLengthAllow = checkResult.wordLengthResult().wordAllowLen(); if(wordLengthAllow > 0) { i += wordLengthAllow-1; continue; } - WordCheckResult checkResult = sensitiveCheck.sensitiveCheck(i, checkContext); // 命中 - int wordLength = checkResult.index(); + int wordLength = checkResult.wordLengthResult().wordDenyLen(); if (wordLength > 0) { // 保存敏感词 WordResult wordResult = WordResult.newInstance() diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractConditionWordCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractConditionWordCheck.java index f793326..ffd43df 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractConditionWordCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractConditionWordCheck.java @@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.support.check; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.support.result.WordLengthResult; import java.util.Map; @@ -37,7 +38,7 @@ public abstract class AbstractConditionWordCheck extends AbstractWordCheck { final StringBuilder stringBuilder, InnerSensitiveWordContext checkContext); @Override - protected int getActualLength(int beginIndex, InnerSensitiveWordContext checkContext) { + protected WordLengthResult getActualLength(int beginIndex, InnerSensitiveWordContext checkContext) { final String txt = checkContext.originalText(); final IWordContext context = checkContext.wordContext(); final Map formatCharMapping = checkContext.formatCharMapping(); @@ -69,7 +70,9 @@ public abstract class AbstractConditionWordCheck extends AbstractWordCheck { actualLength = stringBuilder.length(); } - return actualLength; + return WordLengthResult.newInstance() + .wordDenyLen(actualLength) + .wordAllowLen(0); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java index 5b36b05..32f6ea6 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java @@ -4,6 +4,7 @@ import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.sensitive.word.api.IWordCheck; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.support.result.WordLengthResult; /** * 抽象实现策略 @@ -28,7 +29,7 @@ public abstract class AbstractWordCheck implements IWordCheck { * @return 长度 * @since 0.4.0 */ - protected abstract int getActualLength(int beginIndex, final InnerSensitiveWordContext checkContext); + protected abstract WordLengthResult getActualLength(int beginIndex, final InnerSensitiveWordContext checkContext); /** * 获取类别 @@ -42,17 +43,21 @@ public abstract class AbstractWordCheck implements IWordCheck { final InnerSensitiveWordContext checkContext) { Class clazz = getSensitiveCheckClass(); final String txt = checkContext.originalText(); + WordLengthResult wordLengthResult = WordLengthResult.newInstance() + .wordAllowLen(0) + .wordDenyLen(0); + if(StringUtil.isEmpty(txt)) { return WordCheckResult.newInstance() - .index(0) + .wordLengthResult(wordLengthResult) .type(getType()) .checkClass(clazz); } - int actualLength = getActualLength(beginIndex, checkContext); + wordLengthResult = getActualLength(beginIndex, checkContext); return WordCheckResult.newInstance() - .index(actualLength) + .wordLengthResult(wordLengthResult) .type(getType()) .checkClass(clazz); } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckInit.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckInit.java index 8298fba..d7f3c2b 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckInit.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckInit.java @@ -4,6 +4,7 @@ import com.github.houbb.heaven.support.pipeline.Pipeline; import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline; import com.github.houbb.sensitive.word.api.IWordCheck; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.support.result.WordLengthResult; import java.util.List; @@ -34,7 +35,8 @@ public abstract class WordCheckInit implements IWordCheck { for(IWordCheck sensitiveCheck : sensitiveChecks) { WordCheckResult result = sensitiveCheck.sensitiveCheck(beginIndex, checkContext); - if(result.index() > 0) { + WordLengthResult wordLengthResult = result.wordLengthResult(); + if(wordLengthResult.wordAllowLen() > 0 || wordLengthResult.wordDenyLen()> 0) { return result; } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNone.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNone.java index c9fba58..6464592 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNone.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNone.java @@ -4,6 +4,7 @@ import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.sensitive.word.api.IWordCheck; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; +import com.github.houbb.sensitive.word.support.result.WordLengthResult; /** * 未匹配 @@ -28,7 +29,7 @@ public class WordCheckNone implements IWordCheck { */ private static final WordCheckResult NONE_RESULT = WordCheckResult.newInstance() .type(WordTypeEnum.DEFAULTS.getCode()) - .index(0) + .wordLengthResult(WordLengthResult.newInstance()) .checkClass(WordCheckNone.class); public static WordCheckResult getNoneResult() { diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckResult.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckResult.java index 1fdba99..e2d7e65 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckResult.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckResult.java @@ -1,6 +1,7 @@ package com.github.houbb.sensitive.word.support.check; import com.github.houbb.sensitive.word.api.IWordCheck; +import com.github.houbb.sensitive.word.support.result.WordLengthResult; /** * 敏感信息监测接口结果 @@ -12,10 +13,9 @@ import com.github.houbb.sensitive.word.api.IWordCheck; public class WordCheckResult { /** - * 下标 - * @since 0.0.12 + * 命中的黑白名单的长度对象 */ - private int index; + private WordLengthResult wordLengthResult; /** * 检测类 @@ -35,12 +35,12 @@ public class WordCheckResult { return new WordCheckResult(); } - public int index() { - return index; + public WordLengthResult wordLengthResult() { + return wordLengthResult; } - public WordCheckResult index(int index) { - this.index = index; + public WordCheckResult wordLengthResult(WordLengthResult wordLengthResult) { + this.wordLengthResult = wordLengthResult; return this; } @@ -65,7 +65,7 @@ public class WordCheckResult { @Override public String toString() { return "WordCheckResult{" + - "index=" + index + + "wordLengthResult=" + wordLengthResult + ", checkClass=" + checkClass + ", type='" + type + '\'' + '}'; diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java index a582b83..4fb7488 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java @@ -1,6 +1,7 @@ package com.github.houbb.sensitive.word.support.check; import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore; import com.github.houbb.sensitive.word.api.IWordCheck; import com.github.houbb.sensitive.word.api.IWordContext; @@ -9,6 +10,7 @@ import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; +import com.github.houbb.sensitive.word.support.result.WordLengthResult; import java.util.Map; @@ -35,54 +37,63 @@ public class WordCheckWord extends AbstractWordCheck { } @Override - protected int getActualLength(int beginIndex, InnerSensitiveWordContext innerContext) { + protected WordLengthResult getActualLength(int beginIndex, InnerSensitiveWordContext innerContext) { final String txt = innerContext.originalText(); final Map formatCharMapping = innerContext.formatCharMapping(); - final WordValidModeEnum wordValidModeEnum = innerContext.modeEnum(); final IWordContext context = innerContext.wordContext(); - - // 采用 ThreadLocal 应该可以提升性能,减少对象的创建。 - int actualLength = 0; final IWordData wordData = context.wordData(); + final IWordData wordDataAllow = context.wordDataAllow(); + final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore(); // 前一个条件 StringBuilder stringBuilder = new StringBuilder(); char[] rawChars = txt.toCharArray(); - final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore(); int tempLen = 0; - for(int i = beginIndex; i < rawChars.length; i++) { - // 判断是否跳过? - // 避免开始的时候命中 https://github.com/houbb/sensitive-word/issues/68 - if(wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) { + int maxWhite = 0; + int maxBlack = 0; + boolean firstCheck = true; + + WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext); + WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext); + + for (int i = beginIndex; i < rawChars.length; i++) { + if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) { tempLen++; continue; } - // 映射处理 - final char currentChar = rawChars[i]; - char mappingChar = formatCharMapping.get(currentChar); + char mappingChar = formatCharMapping.get(rawChars[i]); stringBuilder.append(mappingChar); tempLen++; - // 判断是否存在 - WordContainsTypeEnum wordContainsTypeEnum = wordData.contains(stringBuilder, innerContext); - if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) { - actualLength = tempLen; - - // 是否遍历全部匹配的模式 - if(WordValidModeEnum.FAIL_FAST.equals(wordValidModeEnum)) { - break; + if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow)) { + wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext); + if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) { + maxWhite += tempLen; + wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND; } } - // 如果不包含,则直接返回。后续遍历无意义 - if(WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnum)) { + if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) { + wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext); + if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) { + maxBlack += tempLen; + wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND; + } + } + + firstCheck = false; + + if (WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow) && + WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) { break; } } - return actualLength; + return WordLengthResult.newInstance() + .wordAllowLen(maxWhite) + .wordDenyLen(maxBlack); } @Override diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordAllow.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordAllow.java deleted file mode 100644 index 61f1d0a..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordAllow.java +++ /dev/null @@ -1,90 +0,0 @@ -package com.github.houbb.sensitive.word.support.check; - -import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore; -import com.github.houbb.sensitive.word.api.IWordCheck; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.api.IWordData; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; -import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; -import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; -import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum; - -import java.util.Map; - -/** - * 敏感词监测实现(白名单) - * @author binbin.hou - * @since 0.21.0 - */ -@ThreadSafe -public class WordCheckWordAllow extends AbstractWordCheck { - - private static final IWordCheck INSTANCE = new WordCheckWordAllow(); - - public static IWordCheck getInstance() { - return INSTANCE; - } - - @Override - protected Class getSensitiveCheckClass() { - return WordCheckWordAllow.class; - } - - @Override - protected int getActualLength(int beginIndex, InnerSensitiveWordContext innerContext) { - final String txt = innerContext.originalText(); - final Map formatCharMapping = innerContext.formatCharMapping(); - final WordValidModeEnum wordValidModeEnum = innerContext.modeEnum(); - final IWordContext context = innerContext.wordContext(); - - // 采用 ThreadLocal 应该可以提升性能,减少对象的创建。 - int actualLength = 0; - final IWordData wordDataAllow = context.wordDataAllow(); - - // 前一个条件 - StringBuilder stringBuilder = new StringBuilder(); - char[] rawChars = txt.toCharArray(); - - final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore(); - int tempLen = 0; - for(int i = beginIndex; i < rawChars.length; i++) { - // 判断是否跳过? - // 避免开始的时候命中 https://github.com/houbb/sensitive-word/issues/68 - if(wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) { - tempLen++; - continue; - } - - // 映射处理 - final char currentChar = rawChars[i]; - char mappingChar = formatCharMapping.get(currentChar); - stringBuilder.append(mappingChar); - tempLen++; - - // 判断是否存在 - WordContainsTypeEnum wordContainsTypeEnum = wordDataAllow.contains(stringBuilder, innerContext); - if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) { - actualLength = tempLen; - - // 是否遍历全部匹配的模式 - if(WordValidModeEnum.FAIL_FAST.equals(wordValidModeEnum)) { - break; - } - } - - // 如果不包含,则直接返回。后续遍历无意义 - if(WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnum)) { - break; - } - } - - return actualLength; - } - - @Override - protected String getType() { - return WordTypeEnum.WORD.getCode(); - } - -} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java new file mode 100644 index 0000000..aac3136 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java @@ -0,0 +1,32 @@ +package com.github.houbb.sensitive.word.support.result; + +public class WordLengthResult { + private int wordAllowLen; + private int wordDenyLen; + + + private WordLengthResult(){} + + public static WordLengthResult newInstance(){ + return new WordLengthResult(); + } + + + public int wordAllowLen(){ + return this.wordAllowLen; + } + public WordLengthResult wordAllowLen(int wordAllowLen){ + this.wordAllowLen=wordAllowLen; + return this; + } + + public int wordDenyLen(){ + return this.wordDenyLen; + } + public WordLengthResult wordDenyLen(int wordDenyLen){ + this.wordDenyLen=wordDenyLen; + return this; + } + + +} diff --git a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java index aa2d4b0..2ca4389 100644 --- a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java @@ -1,11 +1,16 @@ package com.github.houbb.sensitive.word.benchmark; import com.github.houbb.heaven.util.util.RandomUtil; +import com.github.houbb.sensitive.word.api.IWordAllow; +import com.github.houbb.sensitive.word.api.IWordDeny; import com.github.houbb.sensitive.word.bs.SensitiveWordBs; import com.github.houbb.sensitive.word.core.SensitiveWordHelper; import org.junit.Ignore; import org.junit.Test; +import java.util.Collections; +import java.util.List; + @Ignore public class BenchmarkBasicTest { @@ -66,6 +71,45 @@ public class BenchmarkBasicTest { System.out.println("------------------ COST: " + (end-start)); } + /** + * 黑白名单一次遍历 + */ + @Test + public void costTimeOneTraceTest() { + StringBuilder sb=new StringBuilder(); + for(int i=0;i<100;i++){ + sb.append("地铁口交易").append(i); + } + String text = sb.toString(); + + // 1W 次 + long start = System.currentTimeMillis(); + SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Collections.singletonList("口交"); + } + }) + .wordAllow(new IWordAllow() { + @Override + public List allow() { + return Collections.singletonList("地铁口交易"); + } + }) + .enableWordCheck(true) + .enableNumCheck(false) + .enableUrlCheck(false) + .enableEmailCheck(false) + .init(); + + for(int i = 0; i < 10000; i++) { + sensitiveWordBs.findAll(text); + } + long end = System.currentTimeMillis(); + System.out.println("------------------ COST: " + (end-start)); + } + /** * * COST: 1540-pc