diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 695302b..297127e 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -463,4 +463,11 @@ |:---|:-----|---------------------------------|:------------------|:---------------------------------------------------| | 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 | | 2 | O | 移除引导类公共方法的 check | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 | -| 3 | A | 新增 warmUp | 2025-9-4 16:22:24 | | \ No newline at end of file +| 3 | A | 新增 warmUp | 2025-9-4 16:22:24 | | + +# release_0.29.1 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|-----------------------------|:------------------|:--------------------| +| 1 | O | 改进 check、format 的 chains 方法 | 2025-9-5 16:22:24 | 优化性能 | +| 2 | O | InnerWordFormatUtils#format | 2025-9-5 16:22:24 | 优化性能+内存 toCharArray | diff --git a/README.md b/README.md index 6d422a8..6420574 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大 com.github.houbb sensitive-word - 0.29.0 + 0.29.1 ``` @@ -1365,25 +1365,13 @@ ps: 不同环境会有差异,但是比例基本稳定。 # 后期 road-map -- [x] 移除单个汉字的敏感词,在中国,要把词组当做一次词,降低误判率。 +- [] fastutil 优化 jdk 内置集合类 -- [x] 支持单个的敏感词变化? +- [] 中文转换优化-opencc4j 内存+性能优化 -remove、add、edit? +WordFormatIgnoreChineseStyle 转换类不够精简,可以优化一下。采用直接映射(收口在 opencc4j)。 -- [x] 敏感词标签接口支持 - -- [x] 敏感词处理时标签支持 - -- [x] wordData 的内存占用对比 + 优化 - -- [x] 用户指定自定义的词组,同时允许指定词组的组合获取,更加灵活 - -FormatCombine/CheckCombine/AllowDenyCombine 组合策略,允许用户自定义。 - -- [ ] word check 策略的优化,统一遍历+转换 - -- [ ] 添加 ThreadLocal 等性能优化 +- [] 各种其他涉及到 char 拆箱的地方改进 # 拓展阅读 diff --git a/pom.xml b/pom.xml index 2e3144e..115dfd7 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.29.0 + 0.29.1 diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckArray.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckArray.java new file mode 100644 index 0000000..e036f66 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckArray.java @@ -0,0 +1,47 @@ +package com.github.houbb.sensitive.word.support.check; + +import com.github.houbb.heaven.util.common.ArgUtil; +import com.github.houbb.sensitive.word.api.IWordCheck; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.support.result.WordLengthResult; + +import java.util.List; + +/** + * 集合 + * @author binbin.hou + * @since 0.30.0 + */ +public class WordCheckArray implements IWordCheck { + + private final IWordCheck[] sensitiveChecks; + private final int size; + public WordCheckArray(List sensitiveChecks) { + ArgUtil.notEmpty(sensitiveChecks, "sensitiveChecks"); + + this.size = sensitiveChecks.size(); + this.sensitiveChecks = new IWordCheck[size]; + for(int i = 0; i < size; i++) { + this.sensitiveChecks[i] = sensitiveChecks.get(i); + } + } + + @Override + public WordCheckResult sensitiveCheck(int beginIndex, InnerSensitiveWordContext checkContext) { + // 循环调用 + for(int i = 0; i < size; i++) { + IWordCheck sensitiveCheck = sensitiveChecks[i]; + WordCheckResult result = sensitiveCheck.sensitiveCheck(beginIndex, checkContext); + + WordLengthResult wordLengthResult = result.wordLengthResult(); + if(wordLengthResult.wordAllowLen() > 0 || wordLengthResult.wordDenyLen()> 0) { + return result; + } + } + + // 这里直接进行正则表达式相关的调用。 + // 默认返回 0 + return WordCheckNone.getNoneResult(); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckInit.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckInit.java index d7f3c2b..9234c9d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckInit.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckInit.java @@ -12,6 +12,7 @@ import java.util.List; * 检测初始化类 * @since 0.3.0 */ +@Deprecated public abstract class WordCheckInit implements IWordCheck { /** diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java index d173c2d..5fff684 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java @@ -1,13 +1,10 @@ package com.github.houbb.sensitive.word.support.check; -import com.github.houbb.heaven.support.pipeline.Pipeline; import com.github.houbb.heaven.util.util.ArrayUtil; import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.sensitive.word.api.IWordCheck; -import com.github.houbb.sensitive.word.api.IWordContext; import java.util.ArrayList; -import java.util.Collection; import java.util.List; /** @@ -23,29 +20,16 @@ public final class WordChecks { return none(); } - return new WordCheckInit() { - @Override - protected void init(Pipeline pipeline) { - for(IWordCheck check : sensitiveChecks) { - pipeline.addLast(check); - } - } - }; + List wordChecks = new ArrayList<>(sensitiveChecks.length); + return array(wordChecks); } - public static IWordCheck chains(final Collection sensitiveChecks) { + public static IWordCheck chains(final List sensitiveChecks) { if (CollectionUtil.isEmpty(sensitiveChecks)){ return none(); } - return new WordCheckInit() { - @Override - protected void init(Pipeline pipeline) { - for(IWordCheck check : sensitiveChecks) { - pipeline.addLast(check); - } - } - }; + return array(sensitiveChecks); } public static IWordCheck email() { @@ -88,4 +72,14 @@ public final class WordChecks { return WordCheckUrlNoPrefix.getInstance(); } + /** + * 集合 + * + * @return 实现 + * @since 0.30.0 + */ + public static IWordCheck array(final List wordChecks) { + return new WordCheckArray(wordChecks); + } + } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTreeNode.java b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTreeNode.java index dc5fa34..d44a4c3 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTreeNode.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTreeNode.java @@ -31,7 +31,7 @@ public class WordDataTreeNode implements ISensitiveWordDestroy { return this; } - public WordDataTreeNode getSubNode(final char c) { + public WordDataTreeNode getSubNode(final Character c) { if(subNodeMap == null) { return null; } @@ -52,14 +52,14 @@ public class WordDataTreeNode implements ISensitiveWordDestroy { subNodeMap=null; } - public void removeNode(final char c) { + public void removeNode(final Character c) { if (subNodeMap == null) { return; } subNodeMap.remove(c); } - public WordDataTreeNode addSubNode(char c, WordDataTreeNode subNode) { + public WordDataTreeNode addSubNode(Character c, WordDataTreeNode subNode) { if(this.subNodeMap == null) { subNodeMap = new HashMap<>(); } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatArray.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatArray.java new file mode 100644 index 0000000..77fe26d --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatArray.java @@ -0,0 +1,39 @@ +package com.github.houbb.sensitive.word.support.format; + +import com.github.houbb.heaven.util.common.ArgUtil; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.api.IWordFormat; + +import java.util.List; + +/** + * 直接列表调用 + * @author binbin.hou + * @since 0.30.0 + */ +public class WordFormatArray implements IWordFormat { + + private final IWordFormat[] wordFormats; + private final int size; + public WordFormatArray(List wordFormats) { + ArgUtil.notEmpty(wordFormats, "wordFormats"); + + this.size = wordFormats.size(); + this.wordFormats = new IWordFormat[size]; + for(int i = 0; i < size; i++) { + this.wordFormats[i] = wordFormats.get(i); + } + } + + @Override + public char format(char original, IWordContext context) { + char c = original; + for(int i = 0; i < size; i++) { + IWordFormat charFormat = wordFormats[i]; + c = charFormat.format(c, context); + } + + return c; + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatInit.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatInit.java index 2c0c4fd..c047d59 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatInit.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatInit.java @@ -14,6 +14,7 @@ import java.util.List; * @since 0.0.5 */ @ThreadSafe +@Deprecated public abstract class WordFormatInit implements IWordFormat { /** diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java index eceb3b4..505ea0d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java @@ -1,13 +1,10 @@ package com.github.houbb.sensitive.word.support.format; -import com.github.houbb.heaven.support.pipeline.Pipeline; -import com.github.houbb.heaven.util.guava.Guavas; import com.github.houbb.heaven.util.util.ArrayUtil; import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.sensitive.word.api.IWordFormat; -import com.github.houbb.sensitive.word.api.IWordContext; -import java.util.Collection; +import java.util.ArrayList; import java.util.List; /** @@ -29,14 +26,8 @@ public final class WordFormats { return none(); } - return new WordFormatInit() { - @Override - protected void init(Pipeline pipeline) { - for(IWordFormat charFormat : charFormats) { - pipeline.addLast(charFormat); - } - } - }; + List wordFormats = new ArrayList<>(charFormats.length); + return array(wordFormats); } /** @@ -44,19 +35,12 @@ public final class WordFormats { * @param charFormats 列表 * @return 结果 */ - public static IWordFormat chains(final Collection charFormats) { + public static IWordFormat chains(final List charFormats) { if(CollectionUtil.isEmpty(charFormats)) { return none(); } - return new WordFormatInit() { - @Override - protected void init(Pipeline pipeline) { - for(IWordFormat charFormat : charFormats) { - pipeline.addLast(charFormat); - } - } - }; + return array(charFormats); } public static IWordFormat none() { @@ -82,4 +66,8 @@ public final class WordFormats { return WordFormatIgnoreWidth.getInstance(); } + public static IWordFormat array(final List wordFormats) { + return new WordFormatArray(wordFormats); + } + } diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java index 25cdb7c..a03775f 100644 --- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java @@ -35,8 +35,9 @@ public final class InnerWordFormatUtils { StringBuilder stringBuilder = new StringBuilder(); IWordFormat charFormat = context.wordFormat(); - char[] chars = original.toCharArray(); - for(char c : chars) { + int len = original.length(); + for(int i = 0; i < len; i++) { + char c = original.charAt(i); char cf = charFormat.format(c, context); stringBuilder.append(cf); }