diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md
index daf1ff9..508d158 100644
--- a/CHANGE_LOG.md
+++ b/CHANGE_LOG.md
@@ -170,3 +170,11 @@
| 1 | O | 优化单词校验逻辑 | 2023-06-08 23:51:58 | |
| 2 | A | 新增是否单词校验的开关 | 2023-06-08 23:51:58 | |
+
+# release_0.5.0
+
+| 序号 | 变更类型 | 说明 | 时间 | 备注 |
+|:---|:-----|-----------------------------|:--------------------|:-------|
+| 1 | A | 优化单词结果,减少 String 创建 | 2023-06-08 23:51:58 | |
+| 2 | A | 优化 contains 判断,减少 String 创建 | 2023-06-08 23:51:58 | |
+
diff --git a/pom.xml b/pom.xml
index 10ad0e8..baf22df 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
com.github.houbb
sensitive-word
- 0.4.0
+ 0.5.0
diff --git a/release.bat b/release.bat
index b1af6b6..690d4a5 100644
--- a/release.bat
+++ b/release.bat
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
-SET version=0.4.0
+SET version=0.5.0
:::: 新版本名称
-SET newVersion=0.5.0
+SET newVersion=0.6.0
:::: 组织名称
SET groupName=com.github.houbb
:::: 项目名称
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
index 80f8fa4..5cd5d77 100644
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
@@ -2,10 +2,8 @@ package com.github.houbb.sensitive.word.api;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
-import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import java.util.Collection;
-import java.util.List;
/**
* 敏感词 map
@@ -24,13 +22,13 @@ public interface IWordMap {
/**
* 是否包含敏感词
- * @param string 字符串
+ * @param stringBuilder 缓冲
* @param context 上下文
* @return 是否包含
- * @since 0.0.1
+ * @since 0.5.0
* @see ValidModeEnum#FAIL_FAST 建议使用快速返回模式
*/
- WordContainsTypeEnum contains(final String string,
+ WordContainsTypeEnum contains(final StringBuilder stringBuilder,
final IWordContext context);
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java
index ec101b7..aa310da 100644
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java
@@ -7,13 +7,6 @@ package com.github.houbb.sensitive.word.api;
*/
public interface IWordResult {
- /**
- * 敏感词
- * @return 敏感词
- * @since 0.1.0
- */
- String word();
-
/**
* 开始下标
* @return 开始下标
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordResultHandler.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordResultHandler.java
index 9ecd075..fa5e435 100644
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordResultHandler.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordResultHandler.java
@@ -10,9 +10,13 @@ public interface IWordResultHandler {
/**
* 对于结果的处理
* @param wordResult 结果
+ * @param wordContext 上下文
+ * @param originalText 原始文本
* @return 处理结果
* @since 0.1.0
*/
- R handle(final IWordResult wordResult);
+ R handle(final IWordResult wordResult,
+ final IWordContext wordContext,
+ final String originalText);
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
index d5696f1..01149bf 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
@@ -434,7 +434,7 @@ public class SensitiveWordBs {
return CollectionUtil.toList(wordResults, new IHandler() {
@Override
public R handle(IWordResult wordResult) {
- return handler.handle(wordResult);
+ return handler.handle(wordResult, context, target);
}
});
}
@@ -453,7 +453,7 @@ public class SensitiveWordBs {
ArgUtil.notNull(handler, "handler");
IWordResult wordResult = sensitiveWord.findFirst(target, context);
- return handler.handle(wordResult);
+ return handler.handle(wordResult, context, target);
}
/**
diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java
index f8164f5..692bd76 100644
--- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java
@@ -57,14 +57,9 @@ public class SensitiveWord extends AbstractSensitiveWord {
int wordLength = checkResult.index();
if (wordLength > 0) {
// 保存敏感词
- // TODO: 这其实是一个比较消耗的操作,后续可以考虑简化掉。
- String sensitiveWord = text.substring(i, i + wordLength);
-
- // 添加去重
WordResult wordResult = WordResult.newInstance()
.startIndex(i)
- .endIndex(i+wordLength)
- .word(sensitiveWord);
+ .endIndex(i+wordLength);
resultList.add(wordResult);
// 快速返回
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java
index ff4c89e..7dda748 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java
@@ -45,7 +45,7 @@ public class SensitiveCheckWord extends AbstractSensitiveCheck {
stringBuilder.append(mappingChar);
// 判断是否存在
- WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder.toString(), context);
+ WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder, context);
if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) {
actualLength = stringBuilder.length();
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/map/WordMap.java b/src/main/java/com/github/houbb/sensitive/word/support/map/WordMap.java
index 38de3b1..bb4b422 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/map/WordMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/map/WordMap.java
@@ -94,28 +94,30 @@ public class WordMap implements IWordMap {
* (1)直接遍历所有
* (2)如果遇到,则直接返回 true
*
- * @param string 字符串
+ * @param stringBuilder 字符串
* @return 是否包含
* @since 0.0.1
*/
@Override
- public WordContainsTypeEnum contains(String string, final IWordContext context) {
- if (StringUtil.isEmpty(string)) {
+ public WordContainsTypeEnum contains(StringBuilder stringBuilder, final IWordContext context) {
+ if (stringBuilder == null
+ || stringBuilder.length() <= 0) {
return WordContainsTypeEnum.NOT_FOUND;
}
- return innerContainsSensitive(string, context);
+ return innerContainsSensitive(stringBuilder, context);
}
- private WordContainsTypeEnum innerContainsSensitive(String txt,
+ private WordContainsTypeEnum innerContainsSensitive(StringBuilder stringBuilder,
IWordContext context) {
// 初始化为当前的 map
Map nowMap = this.innerWordMap;
// 记录敏感词的长度
- for (int i = 0; i < txt.length(); i++) {
+ final int len = stringBuilder.length();
+ for (int i = 0; i < len; i++) {
// 获取当前的 map 信息
- nowMap = getNowMap(nowMap, context, txt, i);
+ nowMap = getNowMap(nowMap, context, stringBuilder, i);
// 如果不为空,则判断是否为结尾。
if (ObjectUtil.isNull(nowMap)) {
@@ -155,16 +157,16 @@ public class WordMap implements IWordMap {
* 获取当前的 Map
* @param nowMap 原始的当前 map
* @param context 上下文
- * @param txt 文本信息
+ * @param stringBuilder 文本缓存
* @param index 下标
* @return 实际的当前 map
* @since 0.0.7
*/
private Map getNowMap(Map nowMap,
final IWordContext context,
- final String txt,
+ final StringBuilder stringBuilder,
final int index) {
- char c = txt.charAt(index);
+ char c = stringBuilder.charAt(index);
char mappingChar = context.charFormat().format(c, context);
// 这里做一次重复词的处理
@@ -173,7 +175,7 @@ public class WordMap implements IWordMap {
// 启用忽略重复&当前下标不是第一个
if(context.ignoreRepeat()
&& index > 0) {
- char preChar = txt.charAt(index-1);
+ char preChar = stringBuilder.charAt(index-1);
char preMappingChar = context.charFormat().format(preChar, context);
// 直接赋值为上一个 map
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java
index 5980854..10fa165 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java
@@ -8,8 +8,6 @@ import com.github.houbb.sensitive.word.api.IWordResult;
*/
public class WordResult implements IWordResult {
- private String word;
-
private int startIndex;
private int endIndex;
@@ -18,16 +16,6 @@ public class WordResult implements IWordResult {
return new WordResult();
}
- @Override
- public String word() {
- return word;
- }
-
- public WordResult word(String word) {
- this.word = word;
- return this;
- }
-
@Override
public int startIndex() {
return startIndex;
@@ -51,8 +39,7 @@ public class WordResult implements IWordResult {
@Override
public String toString() {
return "WordResult{" +
- "word='" + word + '\'' +
- ", startIndex=" + startIndex +
+ "startIndex=" + startIndex +
", endIndex=" + endIndex +
'}';
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java
index 67f110f..a1ddb2d 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java
@@ -1,6 +1,7 @@
package com.github.houbb.sensitive.word.support.result;
import com.github.houbb.heaven.annotation.ThreadSafe;
+import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
import com.github.houbb.sensitive.word.api.IWordResultHandler;
@@ -22,7 +23,7 @@ public class WordResultHandlerRaw implements IWordResultHandler {
}
@Override
- public IWordResult handle(IWordResult wordResult) {
+ public IWordResult handle(IWordResult wordResult, IWordContext wordContext, String originalText) {
return wordResult;
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java
index 7048759..9c41ef7 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java
@@ -1,8 +1,10 @@
package com.github.houbb.sensitive.word.support.result;
import com.github.houbb.heaven.annotation.ThreadSafe;
+import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
import com.github.houbb.sensitive.word.api.IWordResultHandler;
+import com.github.houbb.sensitive.word.utils.InnerCharUtils;
/**
* 只保留单词
@@ -23,11 +25,13 @@ public class WordResultHandlerWord implements IWordResultHandler {
}
@Override
- public String handle(IWordResult wordResult) {
+ public String handle(IWordResult wordResult, IWordContext wordContext, String originalText) {
if(wordResult == null) {
return null;
}
- return wordResult.word();
+
+ // 截取
+ return InnerCharUtils.getString(originalText.toCharArray(), wordResult);
}
-
+
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java
index f4b9e8b..f6c8653 100644
--- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java
+++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java
@@ -2,6 +2,7 @@ package com.github.houbb.sensitive.word.utils;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.heaven.util.lang.ObjectUtil;
+import com.github.houbb.sensitive.word.api.IWordResult;
import java.util.Map;
@@ -65,4 +66,32 @@ public final class InnerCharUtils {
return character;
}
+ /**
+ * 构建字符串
+ * @param chars 字符数组
+ * @param startIndex 开始位置
+ * @param endIndex 结束位置
+ * @return 结果
+ * @since 0.5.0
+ */
+ public static String getString(final char[] chars,
+ final int startIndex,
+ final int endIndex) {
+ // 截取
+ int len = endIndex - startIndex;
+ return new String(chars, startIndex, len);
+ }
+
+ /**
+ * 构建字符串
+ * @param chars 字符数组
+ * @param wordResult 结果
+ * @return 结果
+ * @since 0.5.0
+ */
+ public static String getString(final char[] chars,
+ final IWordResult wordResult) {
+ return getString(chars, wordResult.startIndex(), wordResult.endIndex());
+ }
+
}
diff --git a/src/test/java/com/github/houbb/sensitive/word/benchmark/BasicTest.java b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java
similarity index 78%
rename from src/test/java/com/github/houbb/sensitive/word/benchmark/BasicTest.java
rename to src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java
index 871efcb..b91d7b3 100644
--- a/src/test/java/com/github/houbb/sensitive/word/benchmark/BasicTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java
@@ -7,14 +7,18 @@ import org.junit.Ignore;
import org.junit.Test;
@Ignore
-public class BasicTest {
+public class BenchmarkBasicTest {
/**
*
*
* 100*100 耗时:926ms,性能較差。
*
- * 100*100000 的字符:12942ms 第一次优化。
+ * 100*100000 的字符:
+ *
+ * 12942ms 第一次优化。
+ * 12983ms 添加对应的 contains 优化,性能无太大变化。
+ *
*/
@Test
public void costTimeTest() {
@@ -34,6 +38,10 @@ public class BasicTest {
/**
*
* 100*100000 的字符:12440ms
+ *
+ * 12111 第一次优化
+ *
+ * 1133 只有单词校验
*/
@Test
public void costTimeOnlyWordTest() {
@@ -43,7 +51,10 @@ public class BasicTest {
// 1W 次
long start = System.currentTimeMillis();
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
- .enableWordCheck(false)
+// .enableWordCheck(false)
+ .enableNumCheck(false)
+ .enableUrlCheck(false)
+ .enableEmailCheck(false)
.init();
for(int i = 0; i < 10000; i++) {
diff --git a/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java
index 9d82823..e66c145 100644
--- a/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java
@@ -62,7 +62,7 @@ public class SensitiveWordHelperTest {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
List wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
- Assert.assertEquals("[WordResult{word='五星红旗', startIndex=0, endIndex=4}, WordResult{word='毛主席', startIndex=9, endIndex=12}, WordResult{word='天安门', startIndex=18, endIndex=21}]", wordList.toString());
+ Assert.assertEquals("[WordResult{startIndex=0, endIndex=4}, WordResult{startIndex=9, endIndex=12}, WordResult{startIndex=18, endIndex=21}]", wordList.toString());
}
@@ -99,7 +99,7 @@ public class SensitiveWordHelperTest {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw());
- Assert.assertEquals("WordResult{word='五星红旗', startIndex=0, endIndex=4}", word.toString());
+ Assert.assertEquals("WordResult{startIndex=0, endIndex=4}", word.toString());
}
/**
diff --git a/src/test/java/com/github/houbb/sensitive/word/replace/MySensitiveWordReplace.java b/src/test/java/com/github/houbb/sensitive/word/replace/MySensitiveWordReplace.java
index c4f05c0..374e26b 100644
--- a/src/test/java/com/github/houbb/sensitive/word/replace/MySensitiveWordReplace.java
+++ b/src/test/java/com/github/houbb/sensitive/word/replace/MySensitiveWordReplace.java
@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.replace;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
+import com.github.houbb.sensitive.word.utils.InnerCharUtils;
/**
* 自定义敏感词替换策略
@@ -14,7 +15,7 @@ public class MySensitiveWordReplace implements ISensitiveWordReplace {
@Override
public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) {
- String sensitiveWord = wordResult.word();
+ String sensitiveWord = InnerCharUtils.getString(rawChars, wordResult);
// 自定义不同的敏感词替换策略,可以从数据库等地方读取
if("五星红旗".equals(sensitiveWord)) {
stringBuilder.append("国家旗帜");