From 5a2b9da9dfcdc9d59d54d66c7e50c5827aa46034 Mon Sep 17 00:00:00 2001 From: "binbin.hou" Date: Fri, 9 Jun 2023 14:29:33 +0800 Subject: [PATCH] [Feature] add for new --- CHANGE_LOG.md | 6 ++ README.md | 22 +++++- pom.xml | 2 +- release.bat | 4 +- .../api/ISensitiveWordReplaceContext.java | 25 ------ .../houbb/sensitive/word/api/IWordMap.java | 5 +- .../api/context/InnerSensitiveContext.java | 72 ++++++++++++++++++ .../word/core/AbstractSensitiveWord.java | 5 +- .../sensitive/word/core/SensitiveWord.java | 16 ++-- .../word/support/check/ISensitiveCheck.java | 11 +-- .../impl/AbstractConditionSensitiveCheck.java | 36 ++++----- .../check/impl/AbstractSensitiveCheck.java | 39 ++-------- .../check/impl/SensitiveCheckEmail.java | 16 ++-- .../check/impl/SensitiveCheckInit.java | 11 +-- .../check/impl/SensitiveCheckNone.java | 12 ++- .../support/check/impl/SensitiveCheckNum.java | 25 +++--- .../support/check/impl/SensitiveCheckUrl.java | 16 ++-- .../check/impl/SensitiveCheckWord.java | 20 +++-- .../sensitive/word/support/map/WordMap.java | 28 ++++--- .../replace/SensitiveWordReplaceChar.java | 2 - .../replace/SensitiveWordReplaceContext.java | 57 -------------- .../word/utils/InnerFormatUtils.java | 36 ++++++++- .../word/benchmark/BenchmarkBasicTest.java | 4 +- .../word/benchmark/BenchmarkTimesTest.java | 76 +++++++++++++++++++ 24 files changed, 326 insertions(+), 220 deletions(-) delete mode 100644 src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordReplaceContext.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/api/context/InnerSensitiveContext.java delete mode 100644 src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceContext.java create mode 100644 src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkTimesTest.java diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 508d158..0e6a462 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -178,3 +178,9 @@ | 1 | A | 优化单词结果,减少 String 创建 | 2023-06-08 23:51:58 | | | 2 | A | 优化 contains 判断,减少 String 创建 | 2023-06-08 23:51:58 | | +# release_0.6.0 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|--------------------------|:--------------------|:-------| +| 1 | O | 性能优化:字符映射统一处理一遍,而不是每次都处理 | 2023-06-09 23:51:58 | | +| 2 | D | 移除废弃的 replaceContext | 2023-06-09 23:51:58 | | diff --git a/README.md b/README.md index d66d2b2..31d0492 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,8 @@ - 支持数据的数据动态更新,实时生效 +- 只做敏感词过滤时,性能为 7W+ QPS,应用无感 + ## 变更日志 [CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md) @@ -60,7 +62,7 @@ com.github.houbb sensitive-word - 0.5.0 + 0.6.0 ``` @@ -609,6 +611,24 @@ public class SensitiveWordService { 其他使用保持不变,无需重启应用。 +# Benchmark + +V0.6.0 以后,添加对应的 benchmark 测试。 + +> []() + +## 环境 + +测试环境为普通的笔记本: + +``` +处理器 12th Gen Intel(R) Core(TM) i7-1260P 2.10 GHz +机带 RAM 16.0 GB (15.7 GB 可用) +系统类型 64 位操作系统, 基于 x64 的处理器 +``` + +## 测试效果记录 + ## STAR diff --git a/pom.xml b/pom.xml index baf22df..5501edd 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.5.0 + 0.6.0-SNAPSHOT diff --git a/release.bat b/release.bat index 690d4a5..880cef9 100644 --- a/release.bat +++ b/release.bat @@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.5.0 +SET version=0.6.0 :::: 新版本名称 -SET newVersion=0.6.0 +SET newVersion=0.7.0 :::: 组织名称 SET groupName=com.github.houbb :::: 项目名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordReplaceContext.java b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordReplaceContext.java deleted file mode 100644 index fce6da0..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordReplaceContext.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.github.houbb.sensitive.word.api; - -/** - * 敏感词替换策略上下文 - * - * @author binbin.hou - * @since 0.2.0 - */ -public interface ISensitiveWordReplaceContext { - - /** - * 敏感词 - * @return 敏感词 - * @since 0.2.0 - */ - String sensitiveWord(); - - /** - * 单词长度 - * @return 单词长度 - * @since 0.2.0 - */ - int wordLength(); - -} diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java index 5cd5d77..548cf57 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java @@ -1,5 +1,6 @@ package com.github.houbb.sensitive.word.api; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; @@ -23,12 +24,12 @@ public interface IWordMap { /** * 是否包含敏感词 * @param stringBuilder 缓冲 - * @param context 上下文 + * @param innerContext 上下文 * @return 是否包含 * @since 0.5.0 * @see ValidModeEnum#FAIL_FAST 建议使用快速返回模式 */ WordContainsTypeEnum contains(final StringBuilder stringBuilder, - final IWordContext context); + final InnerSensitiveContext innerContext); } diff --git a/src/main/java/com/github/houbb/sensitive/word/api/context/InnerSensitiveContext.java b/src/main/java/com/github/houbb/sensitive/word/api/context/InnerSensitiveContext.java new file mode 100644 index 0000000..44553f8 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/api/context/InnerSensitiveContext.java @@ -0,0 +1,72 @@ +package com.github.houbb.sensitive.word.api.context; + +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; + +import java.util.Map; + +/** + * 内部信息上下文 + * + * @author binbin.hou + * @since 0.6.0 + */ +public class InnerSensitiveContext { + + /** + * 原始文本 + */ + private String originalText; + /** + * 格式化后的字符 + */ + private Map formatCharMapping; + /** + * 校验模式 + */ + private ValidModeEnum modeEnum; + /** + * 原始上下文 + */ + private IWordContext wordContext; + + public static InnerSensitiveContext newInstance() { + return new InnerSensitiveContext(); + } + + public String originalText() { + return originalText; + } + + public InnerSensitiveContext originalText(String text) { + this.originalText = text; + return this; + } + + public Map formatCharMapping() { + return formatCharMapping; + } + + public InnerSensitiveContext formatCharMapping(Map formatCharMapping) { + this.formatCharMapping = formatCharMapping; + return this; + } + + public ValidModeEnum modeEnum() { + return modeEnum; + } + + public InnerSensitiveContext modeEnum(ValidModeEnum modeEnum) { + this.modeEnum = modeEnum; + return this; + } + + public IWordContext wordContext() { + return wordContext; + } + + public InnerSensitiveContext wordContext(IWordContext context) { + this.wordContext = context; + return this; + } +} diff --git a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java index 3843b82..a6a2c5d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java @@ -2,7 +2,10 @@ package com.github.houbb.sensitive.word.core; import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.heaven.util.util.CollectionUtil; -import com.github.houbb.sensitive.word.api.*; +import com.github.houbb.sensitive.word.api.ISensitiveWord; +import com.github.houbb.sensitive.word.api.ISensitiveWordReplace; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.api.IWordResult; import java.util.Collections; import java.util.List; diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java index f4cd8f1..a9e95b7 100644 --- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java @@ -1,19 +1,18 @@ package com.github.houbb.sensitive.word.core; import com.github.houbb.heaven.util.guava.Guavas; -import com.github.houbb.heaven.util.io.FileUtil; import com.github.houbb.sensitive.word.api.ISensitiveWord; -import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordResult; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; -import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl; -import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceContext; import com.github.houbb.sensitive.word.support.result.WordResult; +import com.github.houbb.sensitive.word.utils.InnerFormatUtils; import java.util.List; +import java.util.Map; /** * 默认实现 @@ -54,8 +53,15 @@ public class SensitiveWord extends AbstractSensitiveWord { //TODO: 这里拆分为2个部分,从而保障性能。但是要注意处理下标的问题。 //1. 原始的敏感词部分 //2. email/url/num 的单独一次遍历处理。 + final Map characterCharacterMap = InnerFormatUtils.formatCharsMapping(text, context); + final InnerSensitiveContext checkContext = InnerSensitiveContext.newInstance() + .originalText(text) + .wordContext(context) + .modeEnum(ValidModeEnum.FAIL_OVER) + .formatCharMapping(characterCharacterMap); + for (int i = 0; i < text.length(); i++) { - SensitiveCheckResult checkResult = sensitiveCheck.sensitiveCheck(text, i, ValidModeEnum.FAIL_OVER, context); + SensitiveCheckResult checkResult = sensitiveCheck.sensitiveCheck(i, checkContext); // 命中 int wordLength = checkResult.index(); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/ISensitiveCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/ISensitiveCheck.java index a07578c..ad94de9 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/ISensitiveCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/ISensitiveCheck.java @@ -1,7 +1,6 @@ package com.github.houbb.sensitive.word.support.check; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; /** * 敏感信息监测接口 @@ -27,16 +26,12 @@ public interface ISensitiveCheck { * 2. 敏感词的长度 * 3. 正常走过字段的长度(便于后期替换优化,避免不必要的循环重复) * - * @param txt 文本信息 * @param beginIndex 开始下标 - * @param validModeEnum 验证模式 * @param context 执行上下文 * @return 敏感信息对应的长度 * @since 0.0.5 */ - SensitiveCheckResult sensitiveCheck(final String txt, - final int beginIndex, - final ValidModeEnum validModeEnum, - final IWordContext context); + SensitiveCheckResult sensitiveCheck(final int beginIndex, + final InnerSensitiveContext context); } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractConditionSensitiveCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractConditionSensitiveCheck.java index ad00b3d..1bb82c7 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractConditionSensitiveCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractConditionSensitiveCheck.java @@ -2,9 +2,9 @@ package com.github.houbb.sensitive.word.support.check.impl; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; -import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; -import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; + +import java.util.Map; /** * 抽象实现策略 @@ -19,34 +19,29 @@ public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveC * 当前字符串是否符合规范 * @param mappingChar 当前字符 * @param index 下标 - * @param rawText 原始文本 - * @param context 上下文 + * @param checkContext 校验文本 * @return 结果 * @since 0.3.2 */ - protected abstract boolean isCharCondition(char mappingChar, - int index, - String rawText, - final IWordContext context); + protected abstract boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext); /** * 这里指定一个阈值条件 * @param index 当前下标 - * @param rawText 原始文本 * @param stringBuilder 缓存 - * @param context 上下文 + * @param checkContext 上下文 * @return 是否满足条件 * @since 0.3.2 */ protected abstract boolean isStringCondition(int index, - String rawText, - final StringBuilder stringBuilder, - final IWordContext context); + final StringBuilder stringBuilder, InnerSensitiveContext checkContext); @Override - protected int doGetActualLength(String txt, int beginIndex, - ValidModeEnum validModeEnum, - IWordContext context) { + protected int getActualLength(int beginIndex, InnerSensitiveContext checkContext) { + final String txt = checkContext.originalText(); + final IWordContext context = checkContext.wordContext(); + final Map formatCharMapping = checkContext.formatCharMapping(); + int actualLength = 0; // 采用 ThreadLocal 应该可以提升性能,减少对象的创建。 @@ -55,12 +50,11 @@ public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveC for(int i = beginIndex; i < txt.length(); i++) { currentIx = i; char currentChar = txt.charAt(i); - // 映射处理 - char mappingChar = context.charFormat().format(currentChar, context); + char mappingChar = formatCharMapping.get(currentChar); // 符合条件 - boolean currentCondition = isCharCondition(mappingChar, i, txt, context); + boolean currentCondition = isCharCondition(mappingChar, i, checkContext); //4 个场景 if(currentCondition) { @@ -71,7 +65,7 @@ public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveC } // 匹配 - if(isStringCondition(currentIx, txt, stringBuilder, context)) { + if(isStringCondition(currentIx, stringBuilder, checkContext)) { actualLength = stringBuilder.length(); } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractSensitiveCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractSensitiveCheck.java index 802efc5..270ca53 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractSensitiveCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractSensitiveCheck.java @@ -2,8 +2,7 @@ package com.github.houbb.sensitive.word.support.check.impl; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.util.lang.StringUtil; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; @@ -25,45 +24,23 @@ public abstract class AbstractSensitiveCheck implements ISensitiveCheck { /** * 获取确切的长度 - * @param txt 文本 * @param beginIndex 开始 - * @param validModeEnum 校验枚举 - * @param context 上下文 + * @param checkContext 上下文 * @return 长度 * @since 0.4.0 */ - protected abstract int doGetActualLength(String txt, int beginIndex, - ValidModeEnum validModeEnum, - IWordContext context); - - /** - * 获取确切的长度 - * @param txt 文本 - * @param beginIndex 开始 - * @param validModeEnum 校验枚举 - * @param context 上下文 - * @return 长度 - * @since 0.4.0 - */ - protected int getActualLength(String txt, int beginIndex, - ValidModeEnum validModeEnum, - IWordContext context) { - if(StringUtil.isEmpty(txt)) { - return 0; - } - - return doGetActualLength(txt, beginIndex, validModeEnum, context); - } + protected abstract int getActualLength(int beginIndex, final InnerSensitiveContext checkContext); @Override - public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, - ValidModeEnum validModeEnum, - IWordContext context) { + public SensitiveCheckResult sensitiveCheck(int beginIndex, + final InnerSensitiveContext checkContext) { Class clazz = getSensitiveCheckClass(); + final String txt = checkContext.originalText(); if(StringUtil.isEmpty(txt)) { return SensitiveCheckResult.of(0, clazz); } - int actualLength = getActualLength(txt, beginIndex, validModeEnum, context); + + int actualLength = getActualLength(beginIndex, checkContext); return SensitiveCheckResult.of(actualLength, clazz); } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java index 03ea436..6a9a631 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java @@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.check.impl; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.util.lang.CharUtil; import com.github.houbb.heaven.util.util.regex.RegexUtil; -import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; import com.github.houbb.sensitive.word.constant.AppConst; import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; @@ -35,12 +35,17 @@ public class SensitiveCheckEmail extends AbstractConditionSensitiveCheck { } @Override - protected boolean isCharCondition(char mappingChar, int index, String rawText, IWordContext context) { + protected Class getSensitiveCheckClass() { + return SensitiveCheckEmail.class; + } + + @Override + protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext) { return CharUtil.isEmilChar(mappingChar); } @Override - protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) { + protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveContext checkContext) { int bufferLen = stringBuilder.length(); //x@a.cn @@ -55,9 +60,4 @@ public class SensitiveCheckEmail extends AbstractConditionSensitiveCheck { return RegexUtil.isEmail(string); } - @Override - protected Class getSensitiveCheckClass() { - return SensitiveCheckEmail.class; - } - } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java index 4572c12..db2e9ab 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java @@ -2,8 +2,7 @@ package com.github.houbb.sensitive.word.support.check.impl; import com.github.houbb.heaven.support.pipeline.Pipeline; import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; @@ -25,10 +24,8 @@ public abstract class SensitiveCheckInit implements ISensitiveCheck { @Override - public SensitiveCheckResult sensitiveCheck(String txt, - int beginIndex, - ValidModeEnum validModeEnum, - IWordContext context) { + public SensitiveCheckResult sensitiveCheck(final int beginIndex, + final InnerSensitiveContext checkContext) { Pipeline pipeline = new DefaultPipeline<>(); this.init(pipeline); @@ -36,7 +33,7 @@ public abstract class SensitiveCheckInit implements ISensitiveCheck { // 循环调用 for(ISensitiveCheck sensitiveCheck : sensitiveChecks) { - SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(txt, beginIndex, validModeEnum, context); + SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(beginIndex, checkContext); if(result.index() > 0) { return result; diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java index 3376d7b..a640877 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java @@ -1,8 +1,7 @@ package com.github.houbb.sensitive.word.support.check.impl; import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; @@ -29,13 +28,12 @@ public class SensitiveCheckNone implements ISensitiveCheck { */ private static final SensitiveCheckResult NONE_RESULT = SensitiveCheckResult.of(0, SensitiveCheckNone.class); - @Override - public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { - return NONE_RESULT; - } - public static SensitiveCheckResult getNoneResult() { return NONE_RESULT; } + @Override + public SensitiveCheckResult sensitiveCheck(int beginIndex, InnerSensitiveContext context) { + return NONE_RESULT; + } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java index ca34f8e..6f086b8 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java @@ -1,7 +1,7 @@ package com.github.houbb.sensitive.word.support.check.impl; import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; /** @@ -23,21 +23,20 @@ public class SensitiveCheckNum extends AbstractConditionSensitiveCheck { return INSTANCE; } - @Override - protected boolean isCharCondition(char mappingChar, int index, String rawText, IWordContext context) { - return Character.isDigit(mappingChar); - } - - @Override - protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) { - int bufferLen = stringBuilder.length(); - - return bufferLen >= context.sensitiveCheckNumLen(); - } - @Override protected Class getSensitiveCheckClass() { return SensitiveCheckNum.class; } + @Override + protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext) { + return Character.isDigit(mappingChar); + } + + @Override + protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveContext checkContext) { + int bufferLen = stringBuilder.length(); + return bufferLen >= checkContext.wordContext().sensitiveCheckNumLen(); + } + } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java index 528a20a..94c1519 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java @@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.check.impl; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.util.lang.CharUtil; import com.github.houbb.heaven.util.util.regex.RegexUtil; -import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; import com.github.houbb.sensitive.word.constant.AppConst; import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; @@ -32,12 +32,17 @@ public class SensitiveCheckUrl extends AbstractConditionSensitiveCheck { } @Override - protected boolean isCharCondition(char mappingChar, int index, String rawText, IWordContext context) { + protected Class getSensitiveCheckClass() { + return SensitiveCheckUrl.class; + } + + @Override + protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext) { return CharUtil.isWebSiteChar(mappingChar); } @Override - protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) { + protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveContext checkContext) { int bufferLen = stringBuilder.length(); //a.cn if(bufferLen < 4) { @@ -51,9 +56,4 @@ public class SensitiveCheckUrl extends AbstractConditionSensitiveCheck { return RegexUtil.isWebSite(string); } - @Override - protected Class getSensitiveCheckClass() { - return SensitiveCheckUrl.class; - } - } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java index 7dda748..a60b802 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java @@ -3,10 +3,13 @@ package com.github.houbb.sensitive.word.support.check.impl; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordMap; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; +import java.util.Map; + /** * 敏感词监测实现 * @author binbin.hou @@ -30,22 +33,27 @@ public class SensitiveCheckWord extends AbstractSensitiveCheck { } @Override - protected int doGetActualLength(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { + protected int getActualLength(int beginIndex, InnerSensitiveContext innerContext) { + final String txt = innerContext.originalText(); + final Map formatCharMapping = innerContext.formatCharMapping(); + final ValidModeEnum validModeEnum = innerContext.modeEnum(); + final IWordContext context = innerContext.wordContext(); + // 采用 ThreadLocal 应该可以提升性能,减少对象的创建。 int actualLength = 0; final IWordMap wordMap = context.wordMap(); // 前一个条件 StringBuilder stringBuilder = new StringBuilder(); - for(int i = beginIndex; i < txt.length(); i++) { - char currentChar = txt.charAt(i); - + char[] rawChars = txt.toCharArray(); + for(int i = beginIndex; i < rawChars.length; i++) { // 映射处理 - char mappingChar = context.charFormat().format(currentChar, context); + final char currentChar = rawChars[i]; + char mappingChar = formatCharMapping.get(currentChar); stringBuilder.append(mappingChar); // 判断是否存在 - WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder, context); + WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder, innerContext); if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) { actualLength = stringBuilder.length(); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/map/WordMap.java b/src/main/java/com/github/houbb/sensitive/word/support/map/WordMap.java index bb4b422..fd4db3d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/map/WordMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/map/WordMap.java @@ -5,6 +5,7 @@ import com.github.houbb.heaven.util.lang.ObjectUtil; import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordMap; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; import com.github.houbb.sensitive.word.constant.AppConst; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; @@ -95,21 +96,23 @@ public class WordMap implements IWordMap { * (2)如果遇到,则直接返回 true * * @param stringBuilder 字符串 + * @param innerContext 内部上下文 * @return 是否包含 * @since 0.0.1 */ @Override - public WordContainsTypeEnum contains(StringBuilder stringBuilder, final IWordContext context) { + public WordContainsTypeEnum contains(final StringBuilder stringBuilder, + final InnerSensitiveContext innerContext) { if (stringBuilder == null || stringBuilder.length() <= 0) { return WordContainsTypeEnum.NOT_FOUND; } - return innerContainsSensitive(stringBuilder, context); + return innerContainsSensitive(stringBuilder, innerContext); } private WordContainsTypeEnum innerContainsSensitive(StringBuilder stringBuilder, - IWordContext context) { + final InnerSensitiveContext innerContext) { // 初始化为当前的 map Map nowMap = this.innerWordMap; @@ -117,7 +120,7 @@ public class WordMap implements IWordMap { final int len = stringBuilder.length(); for (int i = 0; i < len; i++) { // 获取当前的 map 信息 - nowMap = getNowMap(nowMap, context, stringBuilder, i); + nowMap = getNowMap(nowMap, i, stringBuilder, innerContext); // 如果不为空,则判断是否为结尾。 if (ObjectUtil.isNull(nowMap)) { @@ -156,18 +159,20 @@ public class WordMap implements IWordMap { /** * 获取当前的 Map * @param nowMap 原始的当前 map - * @param context 上下文 - * @param stringBuilder 文本缓存 * @param index 下标 + * @param stringBuilder 文本缓存 + * @param sensitiveContext 上下文 * @return 实际的当前 map * @since 0.0.7 */ private Map getNowMap(Map nowMap, - final IWordContext context, + final int index, final StringBuilder stringBuilder, - final int index) { - char c = stringBuilder.charAt(index); - char mappingChar = context.charFormat().format(c, context); + final InnerSensitiveContext sensitiveContext) { + final IWordContext context = sensitiveContext.wordContext(); + + // 这里的 char 已经是统一格式化之后的,所以可以不用再次格式化。 + char mappingChar = stringBuilder.charAt(index); // 这里做一次重复词的处理 //TODO: 这里可以优化,是否获取一次。 @@ -175,8 +180,7 @@ public class WordMap implements IWordMap { // 启用忽略重复&当前下标不是第一个 if(context.ignoreRepeat() && index > 0) { - char preChar = stringBuilder.charAt(index-1); - char preMappingChar = context.charFormat().format(preChar, context); + char preMappingChar = stringBuilder.charAt(index-1); // 直接赋值为上一个 map if(preMappingChar == mappingChar) { diff --git a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java index 6b5cc17..878c4da 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java @@ -2,9 +2,7 @@ package com.github.houbb.sensitive.word.support.replace; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.constant.CharConst; -import com.github.houbb.heaven.util.lang.CharUtil; import com.github.houbb.sensitive.word.api.ISensitiveWordReplace; -import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordResult; diff --git a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceContext.java b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceContext.java deleted file mode 100644 index 31c67b1..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceContext.java +++ /dev/null @@ -1,57 +0,0 @@ -package com.github.houbb.sensitive.word.support.replace; - -import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext; - -/** - * 敏感词替换上下文 - * - * @author binbin.hou - * @since 0.2.0 - */ -public class SensitiveWordReplaceContext implements ISensitiveWordReplaceContext { - - public static SensitiveWordReplaceContext newInstance() { - return new SensitiveWordReplaceContext(); - } - - /** - * 敏感词 - * @since 0.2.0 - */ - private String sensitiveWord; - - /** - * 单词长度 - * @since 0.2.0 - */ - private int wordLength; - - @Override - public String sensitiveWord() { - return sensitiveWord; - } - - public SensitiveWordReplaceContext sensitiveWord(String sensitiveWord) { - this.sensitiveWord = sensitiveWord; - return this; - } - - @Override - public int wordLength() { - return wordLength; - } - - public SensitiveWordReplaceContext wordLength(int wordLength) { - this.wordLength = wordLength; - return this; - } - - @Override - public String toString() { - return "SensitiveWordReplaceContext{" + - "sensitiveWord='" + sensitiveWord + '\'' + - ", wordLength=" + wordLength + - '}'; - } - -} diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java index d289e5a..fe96fe2 100644 --- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java @@ -5,8 +5,7 @@ import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.sensitive.word.api.ICharFormat; import com.github.houbb.sensitive.word.api.IWordContext; -import java.util.ArrayList; -import java.util.List; +import java.util.*; /** * 内部格式化工具类 @@ -16,6 +15,12 @@ public final class InnerFormatUtils { private InnerFormatUtils(){} + /** + * 空字符数组 + * @since 0.6.0 + */ + private static final char[] EMPTY_CHARS = new char[0]; + /** * 格式化 * @param original 原始 @@ -39,6 +44,33 @@ public final class InnerFormatUtils { return stringBuilder.toString(); } + /** + * 字符串统一的格式化处理 + * @param original 原始文本 + * @param context 上下文 + * @return 结果 + * @since 0.6.0 + */ + public static Map formatCharsMapping(final String original, final IWordContext context) { + if(StringUtil.isEmpty(original)) { + return Collections.emptyMap(); + } + + final int len = original.length(); + + char[] rawChars = original.toCharArray(); + Map map = new HashMap<>(rawChars.length); + + ICharFormat charFormat = context.charFormat(); + for(int i = 0; i < len; i++) { + final char currentChar = rawChars[i]; + char formatChar = charFormat.format(currentChar, context); + map.put(currentChar, formatChar); + } + + return map; + } + /** * 格式化列表 * @param list 列表 diff --git a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java index 9b3f651..aa2d4b0 100644 --- a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java @@ -6,7 +6,7 @@ import com.github.houbb.sensitive.word.core.SensitiveWordHelper; import org.junit.Ignore; import org.junit.Test; -//@Ignore +@Ignore public class BenchmarkBasicTest { /** @@ -42,6 +42,8 @@ public class BenchmarkBasicTest { * 12111 第一次优化 * * 1133 只有单词校验 + * + * V0.6.0 优化 replace mapping 之后:621ms,性能接近翻倍。 */ @Test public void costTimeOnlyWordTest() { diff --git a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkTimesTest.java b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkTimesTest.java new file mode 100644 index 0000000..fd389f7 --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkTimesTest.java @@ -0,0 +1,76 @@ +package com.github.houbb.sensitive.word.benchmark; + +import com.github.houbb.heaven.util.util.RandomUtil; +import com.github.houbb.sensitive.word.bs.SensitiveWordBs; +import com.github.houbb.sensitive.word.core.SensitiveWordHelper; +import org.junit.Ignore; +import org.junit.Test; + +@Ignore +public class BenchmarkTimesTest { + + /** + * 测试基准:100+字符串 * 10W次 + * + * V0.6.0: 1470ms,接近 7.2W QPS + */ + @Test + public void onlyWordAndNoReplaceTest() { + // 1W 次 + SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() + .enableWordCheck(true) + .enableNumCheck(false) + .enableUrlCheck(false) + .enableEmailCheck(false) + .ignoreRepeat(false) + .ignoreCase(false) + .ignoreNumStyle(false) + .ignoreChineseStyle(false) + .ignoreEnglishStyle(false) + .ignoreWidth(false) + .init(); + + String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100) + + "我们他妈的从来不说脏说"; + + long start = System.currentTimeMillis(); + for(int i = 0; i < 100_000; i++) { + sensitiveWordBs.findAll(randomText); + } + long end = System.currentTimeMillis(); + System.out.println("------------------ COST: " + (end-start)); + } + + /** + * 测试基准:100+字符串 * 10W次 + * + * V0.6.0: 2744ms + */ + @Test + public void onlyWordAndWithReplaceTest() { + // 1W 次 + SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() + .enableWordCheck(true) + .enableNumCheck(false) + .enableUrlCheck(false) + .enableEmailCheck(false) + .ignoreRepeat(true) + .ignoreCase(true) + .ignoreNumStyle(true) + .ignoreChineseStyle(true) + .ignoreEnglishStyle(true) + .ignoreWidth(true) + .init(); + + String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100) + + "我们他妈的从来不说脏说"; + + long start = System.currentTimeMillis(); + for(int i = 0; i < 100_000; i++) { + sensitiveWordBs.findAll(randomText); + } + long end = System.currentTimeMillis(); + System.out.println("------------------ COST: " + (end-start)); + } + +}