diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md
index 508d158..0e6a462 100644
--- a/CHANGE_LOG.md
+++ b/CHANGE_LOG.md
@@ -178,3 +178,9 @@
| 1 | A | 优化单词结果,减少 String 创建 | 2023-06-08 23:51:58 | |
| 2 | A | 优化 contains 判断,减少 String 创建 | 2023-06-08 23:51:58 | |
+# release_0.6.0
+
+| 序号 | 变更类型 | 说明 | 时间 | 备注 |
+|:---|:-----|--------------------------|:--------------------|:-------|
+| 1 | O | 性能优化:字符映射统一处理一遍,而不是每次都处理 | 2023-06-09 23:51:58 | |
+| 2 | D | 移除废弃的 replaceContext | 2023-06-09 23:51:58 | |
diff --git a/README.md b/README.md
index d66d2b2..31d0492 100644
--- a/README.md
+++ b/README.md
@@ -42,6 +42,8 @@
- 支持数据的数据动态更新,实时生效
+- 只做敏感词过滤时,性能为 7W+ QPS,应用无感
+
## 变更日志
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md)
@@ -60,7 +62,7 @@
com.github.houbb
sensitive-word
- 0.5.0
+ 0.6.0
```
@@ -609,6 +611,24 @@ public class SensitiveWordService {
其他使用保持不变,无需重启应用。
+# Benchmark
+
+V0.6.0 以后,添加对应的 benchmark 测试。
+
+> []()
+
+## 环境
+
+测试环境为普通的笔记本:
+
+```
+处理器 12th Gen Intel(R) Core(TM) i7-1260P 2.10 GHz
+机带 RAM 16.0 GB (15.7 GB 可用)
+系统类型 64 位操作系统, 基于 x64 的处理器
+```
+
+## 测试效果记录
+
## STAR
diff --git a/pom.xml b/pom.xml
index baf22df..5501edd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
com.github.houbb
sensitive-word
- 0.5.0
+ 0.6.0-SNAPSHOT
diff --git a/release.bat b/release.bat
index 690d4a5..880cef9 100644
--- a/release.bat
+++ b/release.bat
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
-SET version=0.5.0
+SET version=0.6.0
:::: 新版本名称
-SET newVersion=0.6.0
+SET newVersion=0.7.0
:::: 组织名称
SET groupName=com.github.houbb
:::: 项目名称
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordReplaceContext.java b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordReplaceContext.java
deleted file mode 100644
index fce6da0..0000000
--- a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordReplaceContext.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package com.github.houbb.sensitive.word.api;
-
-/**
- * 敏感词替换策略上下文
- *
- * @author binbin.hou
- * @since 0.2.0
- */
-public interface ISensitiveWordReplaceContext {
-
- /**
- * 敏感词
- * @return 敏感词
- * @since 0.2.0
- */
- String sensitiveWord();
-
- /**
- * 单词长度
- * @return 单词长度
- * @since 0.2.0
- */
- int wordLength();
-
-}
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
index 5cd5d77..548cf57 100644
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
@@ -1,5 +1,6 @@
package com.github.houbb.sensitive.word.api;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
@@ -23,12 +24,12 @@ public interface IWordMap {
/**
* 是否包含敏感词
* @param stringBuilder 缓冲
- * @param context 上下文
+ * @param innerContext 上下文
* @return 是否包含
* @since 0.5.0
* @see ValidModeEnum#FAIL_FAST 建议使用快速返回模式
*/
WordContainsTypeEnum contains(final StringBuilder stringBuilder,
- final IWordContext context);
+ final InnerSensitiveContext innerContext);
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/context/InnerSensitiveContext.java b/src/main/java/com/github/houbb/sensitive/word/api/context/InnerSensitiveContext.java
new file mode 100644
index 0000000..44553f8
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/api/context/InnerSensitiveContext.java
@@ -0,0 +1,72 @@
+package com.github.houbb.sensitive.word.api.context;
+
+import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
+
+import java.util.Map;
+
+/**
+ * 内部信息上下文
+ *
+ * @author binbin.hou
+ * @since 0.6.0
+ */
+public class InnerSensitiveContext {
+
+ /**
+ * 原始文本
+ */
+ private String originalText;
+ /**
+ * 格式化后的字符
+ */
+ private Map formatCharMapping;
+ /**
+ * 校验模式
+ */
+ private ValidModeEnum modeEnum;
+ /**
+ * 原始上下文
+ */
+ private IWordContext wordContext;
+
+ public static InnerSensitiveContext newInstance() {
+ return new InnerSensitiveContext();
+ }
+
+ public String originalText() {
+ return originalText;
+ }
+
+ public InnerSensitiveContext originalText(String text) {
+ this.originalText = text;
+ return this;
+ }
+
+ public Map formatCharMapping() {
+ return formatCharMapping;
+ }
+
+ public InnerSensitiveContext formatCharMapping(Map formatCharMapping) {
+ this.formatCharMapping = formatCharMapping;
+ return this;
+ }
+
+ public ValidModeEnum modeEnum() {
+ return modeEnum;
+ }
+
+ public InnerSensitiveContext modeEnum(ValidModeEnum modeEnum) {
+ this.modeEnum = modeEnum;
+ return this;
+ }
+
+ public IWordContext wordContext() {
+ return wordContext;
+ }
+
+ public InnerSensitiveContext wordContext(IWordContext context) {
+ this.wordContext = context;
+ return this;
+ }
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java
index 3843b82..a6a2c5d 100644
--- a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java
@@ -2,7 +2,10 @@ package com.github.houbb.sensitive.word.core;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
-import com.github.houbb.sensitive.word.api.*;
+import com.github.houbb.sensitive.word.api.ISensitiveWord;
+import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
+import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.api.IWordResult;
import java.util.Collections;
import java.util.List;
diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java
index f4cd8f1..a9e95b7 100644
--- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java
@@ -1,19 +1,18 @@
package com.github.houbb.sensitive.word.core;
import com.github.houbb.heaven.util.guava.Guavas;
-import com.github.houbb.heaven.util.io.FileUtil;
import com.github.houbb.sensitive.word.api.ISensitiveWord;
-import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
-import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl;
-import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceContext;
import com.github.houbb.sensitive.word.support.result.WordResult;
+import com.github.houbb.sensitive.word.utils.InnerFormatUtils;
import java.util.List;
+import java.util.Map;
/**
* 默认实现
@@ -54,8 +53,15 @@ public class SensitiveWord extends AbstractSensitiveWord {
//TODO: 这里拆分为2个部分,从而保障性能。但是要注意处理下标的问题。
//1. 原始的敏感词部分
//2. email/url/num 的单独一次遍历处理。
+ final Map characterCharacterMap = InnerFormatUtils.formatCharsMapping(text, context);
+ final InnerSensitiveContext checkContext = InnerSensitiveContext.newInstance()
+ .originalText(text)
+ .wordContext(context)
+ .modeEnum(ValidModeEnum.FAIL_OVER)
+ .formatCharMapping(characterCharacterMap);
+
for (int i = 0; i < text.length(); i++) {
- SensitiveCheckResult checkResult = sensitiveCheck.sensitiveCheck(text, i, ValidModeEnum.FAIL_OVER, context);
+ SensitiveCheckResult checkResult = sensitiveCheck.sensitiveCheck(i, checkContext);
// 命中
int wordLength = checkResult.index();
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/ISensitiveCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/ISensitiveCheck.java
index a07578c..ad94de9 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/ISensitiveCheck.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/ISensitiveCheck.java
@@ -1,7 +1,6 @@
package com.github.houbb.sensitive.word.support.check;
-import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
/**
* 敏感信息监测接口
@@ -27,16 +26,12 @@ public interface ISensitiveCheck {
* 2. 敏感词的长度
* 3. 正常走过字段的长度(便于后期替换优化,避免不必要的循环重复)
*
- * @param txt 文本信息
* @param beginIndex 开始下标
- * @param validModeEnum 验证模式
* @param context 执行上下文
* @return 敏感信息对应的长度
* @since 0.0.5
*/
- SensitiveCheckResult sensitiveCheck(final String txt,
- final int beginIndex,
- final ValidModeEnum validModeEnum,
- final IWordContext context);
+ SensitiveCheckResult sensitiveCheck(final int beginIndex,
+ final InnerSensitiveContext context);
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractConditionSensitiveCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractConditionSensitiveCheck.java
index ad00b3d..1bb82c7 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractConditionSensitiveCheck.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractConditionSensitiveCheck.java
@@ -2,9 +2,9 @@ package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
-import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
-import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
+
+import java.util.Map;
/**
* 抽象实现策略
@@ -19,34 +19,29 @@ public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveC
* 当前字符串是否符合规范
* @param mappingChar 当前字符
* @param index 下标
- * @param rawText 原始文本
- * @param context 上下文
+ * @param checkContext 校验文本
* @return 结果
* @since 0.3.2
*/
- protected abstract boolean isCharCondition(char mappingChar,
- int index,
- String rawText,
- final IWordContext context);
+ protected abstract boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext);
/**
* 这里指定一个阈值条件
* @param index 当前下标
- * @param rawText 原始文本
* @param stringBuilder 缓存
- * @param context 上下文
+ * @param checkContext 上下文
* @return 是否满足条件
* @since 0.3.2
*/
protected abstract boolean isStringCondition(int index,
- String rawText,
- final StringBuilder stringBuilder,
- final IWordContext context);
+ final StringBuilder stringBuilder, InnerSensitiveContext checkContext);
@Override
- protected int doGetActualLength(String txt, int beginIndex,
- ValidModeEnum validModeEnum,
- IWordContext context) {
+ protected int getActualLength(int beginIndex, InnerSensitiveContext checkContext) {
+ final String txt = checkContext.originalText();
+ final IWordContext context = checkContext.wordContext();
+ final Map formatCharMapping = checkContext.formatCharMapping();
+
int actualLength = 0;
// 采用 ThreadLocal 应该可以提升性能,减少对象的创建。
@@ -55,12 +50,11 @@ public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveC
for(int i = beginIndex; i < txt.length(); i++) {
currentIx = i;
char currentChar = txt.charAt(i);
-
// 映射处理
- char mappingChar = context.charFormat().format(currentChar, context);
+ char mappingChar = formatCharMapping.get(currentChar);
// 符合条件
- boolean currentCondition = isCharCondition(mappingChar, i, txt, context);
+ boolean currentCondition = isCharCondition(mappingChar, i, checkContext);
//4 个场景
if(currentCondition) {
@@ -71,7 +65,7 @@ public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveC
}
// 匹配
- if(isStringCondition(currentIx, txt, stringBuilder, context)) {
+ if(isStringCondition(currentIx, stringBuilder, checkContext)) {
actualLength = stringBuilder.length();
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractSensitiveCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractSensitiveCheck.java
index 802efc5..270ca53 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractSensitiveCheck.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractSensitiveCheck.java
@@ -2,8 +2,7 @@ package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.util.lang.StringUtil;
-import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
@@ -25,45 +24,23 @@ public abstract class AbstractSensitiveCheck implements ISensitiveCheck {
/**
* 获取确切的长度
- * @param txt 文本
* @param beginIndex 开始
- * @param validModeEnum 校验枚举
- * @param context 上下文
+ * @param checkContext 上下文
* @return 长度
* @since 0.4.0
*/
- protected abstract int doGetActualLength(String txt, int beginIndex,
- ValidModeEnum validModeEnum,
- IWordContext context);
-
- /**
- * 获取确切的长度
- * @param txt 文本
- * @param beginIndex 开始
- * @param validModeEnum 校验枚举
- * @param context 上下文
- * @return 长度
- * @since 0.4.0
- */
- protected int getActualLength(String txt, int beginIndex,
- ValidModeEnum validModeEnum,
- IWordContext context) {
- if(StringUtil.isEmpty(txt)) {
- return 0;
- }
-
- return doGetActualLength(txt, beginIndex, validModeEnum, context);
- }
+ protected abstract int getActualLength(int beginIndex, final InnerSensitiveContext checkContext);
@Override
- public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex,
- ValidModeEnum validModeEnum,
- IWordContext context) {
+ public SensitiveCheckResult sensitiveCheck(int beginIndex,
+ final InnerSensitiveContext checkContext) {
Class extends ISensitiveCheck> clazz = getSensitiveCheckClass();
+ final String txt = checkContext.originalText();
if(StringUtil.isEmpty(txt)) {
return SensitiveCheckResult.of(0, clazz);
}
- int actualLength = getActualLength(txt, beginIndex, validModeEnum, context);
+
+ int actualLength = getActualLength(beginIndex, checkContext);
return SensitiveCheckResult.of(actualLength, clazz);
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java
index 03ea436..6a9a631 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java
@@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.util.regex.RegexUtil;
-import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
@@ -35,12 +35,17 @@ public class SensitiveCheckEmail extends AbstractConditionSensitiveCheck {
}
@Override
- protected boolean isCharCondition(char mappingChar, int index, String rawText, IWordContext context) {
+ protected Class extends ISensitiveCheck> getSensitiveCheckClass() {
+ return SensitiveCheckEmail.class;
+ }
+
+ @Override
+ protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext) {
return CharUtil.isEmilChar(mappingChar);
}
@Override
- protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) {
+ protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveContext checkContext) {
int bufferLen = stringBuilder.length();
//x@a.cn
@@ -55,9 +60,4 @@ public class SensitiveCheckEmail extends AbstractConditionSensitiveCheck {
return RegexUtil.isEmail(string);
}
- @Override
- protected Class extends ISensitiveCheck> getSensitiveCheckClass() {
- return SensitiveCheckEmail.class;
- }
-
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java
index 4572c12..db2e9ab 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java
@@ -2,8 +2,7 @@ package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
-import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
@@ -25,10 +24,8 @@ public abstract class SensitiveCheckInit implements ISensitiveCheck {
@Override
- public SensitiveCheckResult sensitiveCheck(String txt,
- int beginIndex,
- ValidModeEnum validModeEnum,
- IWordContext context) {
+ public SensitiveCheckResult sensitiveCheck(final int beginIndex,
+ final InnerSensitiveContext checkContext) {
Pipeline pipeline = new DefaultPipeline<>();
this.init(pipeline);
@@ -36,7 +33,7 @@ public abstract class SensitiveCheckInit implements ISensitiveCheck {
// 循环调用
for(ISensitiveCheck sensitiveCheck : sensitiveChecks) {
- SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(txt, beginIndex, validModeEnum, context);
+ SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(beginIndex, checkContext);
if(result.index() > 0) {
return result;
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java
index 3376d7b..a640877 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java
@@ -1,8 +1,7 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
-import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
@@ -29,13 +28,12 @@ public class SensitiveCheckNone implements ISensitiveCheck {
*/
private static final SensitiveCheckResult NONE_RESULT = SensitiveCheckResult.of(0, SensitiveCheckNone.class);
- @Override
- public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
- return NONE_RESULT;
- }
-
public static SensitiveCheckResult getNoneResult() {
return NONE_RESULT;
}
+ @Override
+ public SensitiveCheckResult sensitiveCheck(int beginIndex, InnerSensitiveContext context) {
+ return NONE_RESULT;
+ }
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java
index ca34f8e..6f086b8 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java
@@ -1,7 +1,7 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
-import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
/**
@@ -23,21 +23,20 @@ public class SensitiveCheckNum extends AbstractConditionSensitiveCheck {
return INSTANCE;
}
- @Override
- protected boolean isCharCondition(char mappingChar, int index, String rawText, IWordContext context) {
- return Character.isDigit(mappingChar);
- }
-
- @Override
- protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) {
- int bufferLen = stringBuilder.length();
-
- return bufferLen >= context.sensitiveCheckNumLen();
- }
-
@Override
protected Class extends ISensitiveCheck> getSensitiveCheckClass() {
return SensitiveCheckNum.class;
}
+ @Override
+ protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext) {
+ return Character.isDigit(mappingChar);
+ }
+
+ @Override
+ protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveContext checkContext) {
+ int bufferLen = stringBuilder.length();
+ return bufferLen >= checkContext.wordContext().sensitiveCheckNumLen();
+ }
+
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java
index 528a20a..94c1519 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java
@@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.util.regex.RegexUtil;
-import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
@@ -32,12 +32,17 @@ public class SensitiveCheckUrl extends AbstractConditionSensitiveCheck {
}
@Override
- protected boolean isCharCondition(char mappingChar, int index, String rawText, IWordContext context) {
+ protected Class extends ISensitiveCheck> getSensitiveCheckClass() {
+ return SensitiveCheckUrl.class;
+ }
+
+ @Override
+ protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext) {
return CharUtil.isWebSiteChar(mappingChar);
}
@Override
- protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) {
+ protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveContext checkContext) {
int bufferLen = stringBuilder.length();
//a.cn
if(bufferLen < 4) {
@@ -51,9 +56,4 @@ public class SensitiveCheckUrl extends AbstractConditionSensitiveCheck {
return RegexUtil.isWebSite(string);
}
- @Override
- protected Class extends ISensitiveCheck> getSensitiveCheckClass() {
- return SensitiveCheckUrl.class;
- }
-
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java
index 7dda748..a60b802 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java
@@ -3,10 +3,13 @@ package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordMap;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
+import java.util.Map;
+
/**
* 敏感词监测实现
* @author binbin.hou
@@ -30,22 +33,27 @@ public class SensitiveCheckWord extends AbstractSensitiveCheck {
}
@Override
- protected int doGetActualLength(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
+ protected int getActualLength(int beginIndex, InnerSensitiveContext innerContext) {
+ final String txt = innerContext.originalText();
+ final Map formatCharMapping = innerContext.formatCharMapping();
+ final ValidModeEnum validModeEnum = innerContext.modeEnum();
+ final IWordContext context = innerContext.wordContext();
+
// 采用 ThreadLocal 应该可以提升性能,减少对象的创建。
int actualLength = 0;
final IWordMap wordMap = context.wordMap();
// 前一个条件
StringBuilder stringBuilder = new StringBuilder();
- for(int i = beginIndex; i < txt.length(); i++) {
- char currentChar = txt.charAt(i);
-
+ char[] rawChars = txt.toCharArray();
+ for(int i = beginIndex; i < rawChars.length; i++) {
// 映射处理
- char mappingChar = context.charFormat().format(currentChar, context);
+ final char currentChar = rawChars[i];
+ char mappingChar = formatCharMapping.get(currentChar);
stringBuilder.append(mappingChar);
// 判断是否存在
- WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder, context);
+ WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder, innerContext);
if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) {
actualLength = stringBuilder.length();
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/map/WordMap.java b/src/main/java/com/github/houbb/sensitive/word/support/map/WordMap.java
index bb4b422..fd4db3d 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/map/WordMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/map/WordMap.java
@@ -5,6 +5,7 @@ import com.github.houbb.heaven.util.lang.ObjectUtil;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordMap;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
@@ -95,21 +96,23 @@ public class WordMap implements IWordMap {
* (2)如果遇到,则直接返回 true
*
* @param stringBuilder 字符串
+ * @param innerContext 内部上下文
* @return 是否包含
* @since 0.0.1
*/
@Override
- public WordContainsTypeEnum contains(StringBuilder stringBuilder, final IWordContext context) {
+ public WordContainsTypeEnum contains(final StringBuilder stringBuilder,
+ final InnerSensitiveContext innerContext) {
if (stringBuilder == null
|| stringBuilder.length() <= 0) {
return WordContainsTypeEnum.NOT_FOUND;
}
- return innerContainsSensitive(stringBuilder, context);
+ return innerContainsSensitive(stringBuilder, innerContext);
}
private WordContainsTypeEnum innerContainsSensitive(StringBuilder stringBuilder,
- IWordContext context) {
+ final InnerSensitiveContext innerContext) {
// 初始化为当前的 map
Map nowMap = this.innerWordMap;
@@ -117,7 +120,7 @@ public class WordMap implements IWordMap {
final int len = stringBuilder.length();
for (int i = 0; i < len; i++) {
// 获取当前的 map 信息
- nowMap = getNowMap(nowMap, context, stringBuilder, i);
+ nowMap = getNowMap(nowMap, i, stringBuilder, innerContext);
// 如果不为空,则判断是否为结尾。
if (ObjectUtil.isNull(nowMap)) {
@@ -156,18 +159,20 @@ public class WordMap implements IWordMap {
/**
* 获取当前的 Map
* @param nowMap 原始的当前 map
- * @param context 上下文
- * @param stringBuilder 文本缓存
* @param index 下标
+ * @param stringBuilder 文本缓存
+ * @param sensitiveContext 上下文
* @return 实际的当前 map
* @since 0.0.7
*/
private Map getNowMap(Map nowMap,
- final IWordContext context,
+ final int index,
final StringBuilder stringBuilder,
- final int index) {
- char c = stringBuilder.charAt(index);
- char mappingChar = context.charFormat().format(c, context);
+ final InnerSensitiveContext sensitiveContext) {
+ final IWordContext context = sensitiveContext.wordContext();
+
+ // 这里的 char 已经是统一格式化之后的,所以可以不用再次格式化。
+ char mappingChar = stringBuilder.charAt(index);
// 这里做一次重复词的处理
//TODO: 这里可以优化,是否获取一次。
@@ -175,8 +180,7 @@ public class WordMap implements IWordMap {
// 启用忽略重复&当前下标不是第一个
if(context.ignoreRepeat()
&& index > 0) {
- char preChar = stringBuilder.charAt(index-1);
- char preMappingChar = context.charFormat().format(preChar, context);
+ char preMappingChar = stringBuilder.charAt(index-1);
// 直接赋值为上一个 map
if(preMappingChar == mappingChar) {
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java
index 6b5cc17..878c4da 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java
@@ -2,9 +2,7 @@ package com.github.houbb.sensitive.word.support.replace;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.constant.CharConst;
-import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
-import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceContext.java b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceContext.java
deleted file mode 100644
index 31c67b1..0000000
--- a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceContext.java
+++ /dev/null
@@ -1,57 +0,0 @@
-package com.github.houbb.sensitive.word.support.replace;
-
-import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
-
-/**
- * 敏感词替换上下文
- *
- * @author binbin.hou
- * @since 0.2.0
- */
-public class SensitiveWordReplaceContext implements ISensitiveWordReplaceContext {
-
- public static SensitiveWordReplaceContext newInstance() {
- return new SensitiveWordReplaceContext();
- }
-
- /**
- * 敏感词
- * @since 0.2.0
- */
- private String sensitiveWord;
-
- /**
- * 单词长度
- * @since 0.2.0
- */
- private int wordLength;
-
- @Override
- public String sensitiveWord() {
- return sensitiveWord;
- }
-
- public SensitiveWordReplaceContext sensitiveWord(String sensitiveWord) {
- this.sensitiveWord = sensitiveWord;
- return this;
- }
-
- @Override
- public int wordLength() {
- return wordLength;
- }
-
- public SensitiveWordReplaceContext wordLength(int wordLength) {
- this.wordLength = wordLength;
- return this;
- }
-
- @Override
- public String toString() {
- return "SensitiveWordReplaceContext{" +
- "sensitiveWord='" + sensitiveWord + '\'' +
- ", wordLength=" + wordLength +
- '}';
- }
-
-}
diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java
index d289e5a..fe96fe2 100644
--- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java
+++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java
@@ -5,8 +5,7 @@ import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.ICharFormat;
import com.github.houbb.sensitive.word.api.IWordContext;
-import java.util.ArrayList;
-import java.util.List;
+import java.util.*;
/**
* 内部格式化工具类
@@ -16,6 +15,12 @@ public final class InnerFormatUtils {
private InnerFormatUtils(){}
+ /**
+ * 空字符数组
+ * @since 0.6.0
+ */
+ private static final char[] EMPTY_CHARS = new char[0];
+
/**
* 格式化
* @param original 原始
@@ -39,6 +44,33 @@ public final class InnerFormatUtils {
return stringBuilder.toString();
}
+ /**
+ * 字符串统一的格式化处理
+ * @param original 原始文本
+ * @param context 上下文
+ * @return 结果
+ * @since 0.6.0
+ */
+ public static Map formatCharsMapping(final String original, final IWordContext context) {
+ if(StringUtil.isEmpty(original)) {
+ return Collections.emptyMap();
+ }
+
+ final int len = original.length();
+
+ char[] rawChars = original.toCharArray();
+ Map map = new HashMap<>(rawChars.length);
+
+ ICharFormat charFormat = context.charFormat();
+ for(int i = 0; i < len; i++) {
+ final char currentChar = rawChars[i];
+ char formatChar = charFormat.format(currentChar, context);
+ map.put(currentChar, formatChar);
+ }
+
+ return map;
+ }
+
/**
* 格式化列表
* @param list 列表
diff --git a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java
index 9b3f651..aa2d4b0 100644
--- a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java
@@ -6,7 +6,7 @@ import com.github.houbb.sensitive.word.core.SensitiveWordHelper;
import org.junit.Ignore;
import org.junit.Test;
-//@Ignore
+@Ignore
public class BenchmarkBasicTest {
/**
@@ -42,6 +42,8 @@ public class BenchmarkBasicTest {
* 12111 第一次优化
*
* 1133 只有单词校验
+ *
+ * V0.6.0 优化 replace mapping 之后:621ms,性能接近翻倍。
*/
@Test
public void costTimeOnlyWordTest() {
diff --git a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkTimesTest.java b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkTimesTest.java
new file mode 100644
index 0000000..fd389f7
--- /dev/null
+++ b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkTimesTest.java
@@ -0,0 +1,76 @@
+package com.github.houbb.sensitive.word.benchmark;
+
+import com.github.houbb.heaven.util.util.RandomUtil;
+import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
+import com.github.houbb.sensitive.word.core.SensitiveWordHelper;
+import org.junit.Ignore;
+import org.junit.Test;
+
+@Ignore
+public class BenchmarkTimesTest {
+
+ /**
+ * 测试基准:100+字符串 * 10W次
+ *
+ * V0.6.0: 1470ms,接近 7.2W QPS
+ */
+ @Test
+ public void onlyWordAndNoReplaceTest() {
+ // 1W 次
+ SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
+ .enableWordCheck(true)
+ .enableNumCheck(false)
+ .enableUrlCheck(false)
+ .enableEmailCheck(false)
+ .ignoreRepeat(false)
+ .ignoreCase(false)
+ .ignoreNumStyle(false)
+ .ignoreChineseStyle(false)
+ .ignoreEnglishStyle(false)
+ .ignoreWidth(false)
+ .init();
+
+ String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100)
+ + "我们他妈的从来不说脏说";
+
+ long start = System.currentTimeMillis();
+ for(int i = 0; i < 100_000; i++) {
+ sensitiveWordBs.findAll(randomText);
+ }
+ long end = System.currentTimeMillis();
+ System.out.println("------------------ COST: " + (end-start));
+ }
+
+ /**
+ * 测试基准:100+字符串 * 10W次
+ *
+ * V0.6.0: 2744ms
+ */
+ @Test
+ public void onlyWordAndWithReplaceTest() {
+ // 1W 次
+ SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
+ .enableWordCheck(true)
+ .enableNumCheck(false)
+ .enableUrlCheck(false)
+ .enableEmailCheck(false)
+ .ignoreRepeat(true)
+ .ignoreCase(true)
+ .ignoreNumStyle(true)
+ .ignoreChineseStyle(true)
+ .ignoreEnglishStyle(true)
+ .ignoreWidth(true)
+ .init();
+
+ String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100)
+ + "我们他妈的从来不说脏说";
+
+ long start = System.currentTimeMillis();
+ for(int i = 0; i < 100_000; i++) {
+ sensitiveWordBs.findAll(randomText);
+ }
+ long end = System.currentTimeMillis();
+ System.out.println("------------------ COST: " + (end-start));
+ }
+
+}