diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index daf1ff9..508d158 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -170,3 +170,11 @@ | 1 | O | 优化单词校验逻辑 | 2023-06-08 23:51:58 | | | 2 | A | 新增是否单词校验的开关 | 2023-06-08 23:51:58 | | + +# release_0.5.0 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|-----------------------------|:--------------------|:-------| +| 1 | A | 优化单词结果,减少 String 创建 | 2023-06-08 23:51:58 | | +| 2 | A | 优化 contains 判断,减少 String 创建 | 2023-06-08 23:51:58 | | + diff --git a/pom.xml b/pom.xml index 10ad0e8..baf22df 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.4.0 + 0.5.0 diff --git a/release.bat b/release.bat index b1af6b6..690d4a5 100644 --- a/release.bat +++ b/release.bat @@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.4.0 +SET version=0.5.0 :::: 新版本名称 -SET newVersion=0.5.0 +SET newVersion=0.6.0 :::: 组织名称 SET groupName=com.github.houbb :::: 项目名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java index 80f8fa4..5cd5d77 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java @@ -2,10 +2,8 @@ package com.github.houbb.sensitive.word.api; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; -import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; import java.util.Collection; -import java.util.List; /** * 敏感词 map @@ -24,13 +22,13 @@ public interface IWordMap { /** * 是否包含敏感词 - * @param string 字符串 + * @param stringBuilder 缓冲 * @param context 上下文 * @return 是否包含 - * @since 0.0.1 + * @since 0.5.0 * @see ValidModeEnum#FAIL_FAST 建议使用快速返回模式 */ - WordContainsTypeEnum contains(final String string, + WordContainsTypeEnum contains(final StringBuilder stringBuilder, final IWordContext context); } diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java index ec101b7..aa310da 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java @@ -7,13 +7,6 @@ package com.github.houbb.sensitive.word.api; */ public interface IWordResult { - /** - * 敏感词 - * @return 敏感词 - * @since 0.1.0 - */ - String word(); - /** * 开始下标 * @return 开始下标 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordResultHandler.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordResultHandler.java index 9ecd075..fa5e435 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordResultHandler.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordResultHandler.java @@ -10,9 +10,13 @@ public interface IWordResultHandler { /** * 对于结果的处理 * @param wordResult 结果 + * @param wordContext 上下文 + * @param originalText 原始文本 * @return 处理结果 * @since 0.1.0 */ - R handle(final IWordResult wordResult); + R handle(final IWordResult wordResult, + final IWordContext wordContext, + final String originalText); } diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index d5696f1..01149bf 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -434,7 +434,7 @@ public class SensitiveWordBs { return CollectionUtil.toList(wordResults, new IHandler() { @Override public R handle(IWordResult wordResult) { - return handler.handle(wordResult); + return handler.handle(wordResult, context, target); } }); } @@ -453,7 +453,7 @@ public class SensitiveWordBs { ArgUtil.notNull(handler, "handler"); IWordResult wordResult = sensitiveWord.findFirst(target, context); - return handler.handle(wordResult); + return handler.handle(wordResult, context, target); } /** diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java index f8164f5..692bd76 100644 --- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java @@ -57,14 +57,9 @@ public class SensitiveWord extends AbstractSensitiveWord { int wordLength = checkResult.index(); if (wordLength > 0) { // 保存敏感词 - // TODO: 这其实是一个比较消耗的操作,后续可以考虑简化掉。 - String sensitiveWord = text.substring(i, i + wordLength); - - // 添加去重 WordResult wordResult = WordResult.newInstance() .startIndex(i) - .endIndex(i+wordLength) - .word(sensitiveWord); + .endIndex(i+wordLength); resultList.add(wordResult); // 快速返回 diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java index ff4c89e..7dda748 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java @@ -45,7 +45,7 @@ public class SensitiveCheckWord extends AbstractSensitiveCheck { stringBuilder.append(mappingChar); // 判断是否存在 - WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder.toString(), context); + WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder, context); if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) { actualLength = stringBuilder.length(); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/map/WordMap.java b/src/main/java/com/github/houbb/sensitive/word/support/map/WordMap.java index 38de3b1..bb4b422 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/map/WordMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/map/WordMap.java @@ -94,28 +94,30 @@ public class WordMap implements IWordMap { * (1)直接遍历所有 * (2)如果遇到,则直接返回 true * - * @param string 字符串 + * @param stringBuilder 字符串 * @return 是否包含 * @since 0.0.1 */ @Override - public WordContainsTypeEnum contains(String string, final IWordContext context) { - if (StringUtil.isEmpty(string)) { + public WordContainsTypeEnum contains(StringBuilder stringBuilder, final IWordContext context) { + if (stringBuilder == null + || stringBuilder.length() <= 0) { return WordContainsTypeEnum.NOT_FOUND; } - return innerContainsSensitive(string, context); + return innerContainsSensitive(stringBuilder, context); } - private WordContainsTypeEnum innerContainsSensitive(String txt, + private WordContainsTypeEnum innerContainsSensitive(StringBuilder stringBuilder, IWordContext context) { // 初始化为当前的 map Map nowMap = this.innerWordMap; // 记录敏感词的长度 - for (int i = 0; i < txt.length(); i++) { + final int len = stringBuilder.length(); + for (int i = 0; i < len; i++) { // 获取当前的 map 信息 - nowMap = getNowMap(nowMap, context, txt, i); + nowMap = getNowMap(nowMap, context, stringBuilder, i); // 如果不为空,则判断是否为结尾。 if (ObjectUtil.isNull(nowMap)) { @@ -155,16 +157,16 @@ public class WordMap implements IWordMap { * 获取当前的 Map * @param nowMap 原始的当前 map * @param context 上下文 - * @param txt 文本信息 + * @param stringBuilder 文本缓存 * @param index 下标 * @return 实际的当前 map * @since 0.0.7 */ private Map getNowMap(Map nowMap, final IWordContext context, - final String txt, + final StringBuilder stringBuilder, final int index) { - char c = txt.charAt(index); + char c = stringBuilder.charAt(index); char mappingChar = context.charFormat().format(c, context); // 这里做一次重复词的处理 @@ -173,7 +175,7 @@ public class WordMap implements IWordMap { // 启用忽略重复&当前下标不是第一个 if(context.ignoreRepeat() && index > 0) { - char preChar = txt.charAt(index-1); + char preChar = stringBuilder.charAt(index-1); char preMappingChar = context.charFormat().format(preChar, context); // 直接赋值为上一个 map diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java index 5980854..10fa165 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java @@ -8,8 +8,6 @@ import com.github.houbb.sensitive.word.api.IWordResult; */ public class WordResult implements IWordResult { - private String word; - private int startIndex; private int endIndex; @@ -18,16 +16,6 @@ public class WordResult implements IWordResult { return new WordResult(); } - @Override - public String word() { - return word; - } - - public WordResult word(String word) { - this.word = word; - return this; - } - @Override public int startIndex() { return startIndex; @@ -51,8 +39,7 @@ public class WordResult implements IWordResult { @Override public String toString() { return "WordResult{" + - "word='" + word + '\'' + - ", startIndex=" + startIndex + + "startIndex=" + startIndex + ", endIndex=" + endIndex + '}'; } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java index 67f110f..a1ddb2d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java @@ -1,6 +1,7 @@ package com.github.houbb.sensitive.word.support.result; import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordResult; import com.github.houbb.sensitive.word.api.IWordResultHandler; @@ -22,7 +23,7 @@ public class WordResultHandlerRaw implements IWordResultHandler { } @Override - public IWordResult handle(IWordResult wordResult) { + public IWordResult handle(IWordResult wordResult, IWordContext wordContext, String originalText) { return wordResult; } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java index 7048759..9c41ef7 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java @@ -1,8 +1,10 @@ package com.github.houbb.sensitive.word.support.result; import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordResult; import com.github.houbb.sensitive.word.api.IWordResultHandler; +import com.github.houbb.sensitive.word.utils.InnerCharUtils; /** * 只保留单词 @@ -23,11 +25,13 @@ public class WordResultHandlerWord implements IWordResultHandler { } @Override - public String handle(IWordResult wordResult) { + public String handle(IWordResult wordResult, IWordContext wordContext, String originalText) { if(wordResult == null) { return null; } - return wordResult.word(); + + // 截取 + return InnerCharUtils.getString(originalText.toCharArray(), wordResult); } - + } diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java index f4b9e8b..f6c8653 100644 --- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java @@ -2,6 +2,7 @@ package com.github.houbb.sensitive.word.utils; import com.github.houbb.heaven.util.guava.Guavas; import com.github.houbb.heaven.util.lang.ObjectUtil; +import com.github.houbb.sensitive.word.api.IWordResult; import java.util.Map; @@ -65,4 +66,32 @@ public final class InnerCharUtils { return character; } + /** + * 构建字符串 + * @param chars 字符数组 + * @param startIndex 开始位置 + * @param endIndex 结束位置 + * @return 结果 + * @since 0.5.0 + */ + public static String getString(final char[] chars, + final int startIndex, + final int endIndex) { + // 截取 + int len = endIndex - startIndex; + return new String(chars, startIndex, len); + } + + /** + * 构建字符串 + * @param chars 字符数组 + * @param wordResult 结果 + * @return 结果 + * @since 0.5.0 + */ + public static String getString(final char[] chars, + final IWordResult wordResult) { + return getString(chars, wordResult.startIndex(), wordResult.endIndex()); + } + } diff --git a/src/test/java/com/github/houbb/sensitive/word/benchmark/BasicTest.java b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java similarity index 78% rename from src/test/java/com/github/houbb/sensitive/word/benchmark/BasicTest.java rename to src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java index 871efcb..b91d7b3 100644 --- a/src/test/java/com/github/houbb/sensitive/word/benchmark/BasicTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java @@ -7,14 +7,18 @@ import org.junit.Ignore; import org.junit.Test; @Ignore -public class BasicTest { +public class BenchmarkBasicTest { /** * * * 100*100 耗时:926ms,性能較差。 * - * 100*100000 的字符:12942ms 第一次优化。 + * 100*100000 的字符: + * + * 12942ms 第一次优化。 + * 12983ms 添加对应的 contains 优化,性能无太大变化。 + * */ @Test public void costTimeTest() { @@ -34,6 +38,10 @@ public class BasicTest { /** * * 100*100000 的字符:12440ms + * + * 12111 第一次优化 + * + * 1133 只有单词校验 */ @Test public void costTimeOnlyWordTest() { @@ -43,7 +51,10 @@ public class BasicTest { // 1W 次 long start = System.currentTimeMillis(); SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() - .enableWordCheck(false) +// .enableWordCheck(false) + .enableNumCheck(false) + .enableUrlCheck(false) + .enableEmailCheck(false) .init(); for(int i = 0; i < 10000; i++) { diff --git a/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java index 9d82823..e66c145 100644 --- a/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java @@ -62,7 +62,7 @@ public class SensitiveWordHelperTest { final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; List wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw()); - Assert.assertEquals("[WordResult{word='五星红旗', startIndex=0, endIndex=4}, WordResult{word='毛主席', startIndex=9, endIndex=12}, WordResult{word='天安门', startIndex=18, endIndex=21}]", wordList.toString()); + Assert.assertEquals("[WordResult{startIndex=0, endIndex=4}, WordResult{startIndex=9, endIndex=12}, WordResult{startIndex=18, endIndex=21}]", wordList.toString()); } @@ -99,7 +99,7 @@ public class SensitiveWordHelperTest { final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw()); - Assert.assertEquals("WordResult{word='五星红旗', startIndex=0, endIndex=4}", word.toString()); + Assert.assertEquals("WordResult{startIndex=0, endIndex=4}", word.toString()); } /** diff --git a/src/test/java/com/github/houbb/sensitive/word/replace/MySensitiveWordReplace.java b/src/test/java/com/github/houbb/sensitive/word/replace/MySensitiveWordReplace.java index c4f05c0..374e26b 100644 --- a/src/test/java/com/github/houbb/sensitive/word/replace/MySensitiveWordReplace.java +++ b/src/test/java/com/github/houbb/sensitive/word/replace/MySensitiveWordReplace.java @@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.replace; import com.github.houbb.sensitive.word.api.ISensitiveWordReplace; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordResult; +import com.github.houbb.sensitive.word.utils.InnerCharUtils; /** * 自定义敏感词替换策略 @@ -14,7 +15,7 @@ public class MySensitiveWordReplace implements ISensitiveWordReplace { @Override public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) { - String sensitiveWord = wordResult.word(); + String sensitiveWord = InnerCharUtils.getString(rawChars, wordResult); // 自定义不同的敏感词替换策略,可以从数据库等地方读取 if("五星红旗".equals(sensitiveWord)) { stringBuilder.append("国家旗帜");