diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 2196ebf..695302b 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -459,6 +459,8 @@ # release_0.29.0 -| 序号 | 变更类型 | 说明 | 时间 | 备注 | -|:---|:-----|-------------------------------|:------------------|:---------------------------------------------------| -| 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 | \ No newline at end of file +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|---------------------------------|:------------------|:---------------------------------------------------| +| 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 | +| 2 | O | 移除引导类公共方法的 check | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 | +| 3 | A | 新增 warmUp | 2025-9-4 16:22:24 | | \ No newline at end of file diff --git a/README.md b/README.md index b49bd59..9e05e0e 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大 com.github.houbb sensitive-word - 0.28.0 + 0.29.0 ``` @@ -1429,6 +1429,8 @@ FormatCombine/CheckCombine/AllowDenyCombine 组合策略,允许用户自定义 [v0.27.0 敏感词库独立拆分](https://houbb.github.io/2020/01/07/sensitive-word-15-v0.27.0-dict-split) +[v0.28.0+v0.29.0 敏感词性能优化值本地方法调用为何这么慢?](https://houbb.github.io/2025/08/29/sensitive-word-why-so-slow) + ![wechat](https://img-blog.csdnimg.cn/63926529df364f09bcb203a8a9016854.png) # NLP 开源矩阵 diff --git a/pom.xml b/pom.xml index e4b0f79..2e3144e 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.29.0-SNAPSHOT + 0.29.0 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordWarmUp.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordWarmUp.java new file mode 100644 index 0000000..95dfe46 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordWarmUp.java @@ -0,0 +1,26 @@ +package com.github.houbb.sensitive.word.api; + +import com.github.houbb.sensitive.word.bs.SensitiveWordBs; + +import java.util.List; + +/** + * 提前预热,触发类加载、JIT 优化等 + * @author binbin.hou + * @since 0.29.0 + */ +public interface IWordWarmUp { + + /** + * 预热 + * @param sensitiveWordBs 引导类本身 + * @param wordContext 上下文 + * @param wordDenyList 允许 + * @param wordAllowList 禁止 + */ + void warmUp(final SensitiveWordBs sensitiveWordBs, + final IWordContext wordContext, + final List wordAllowList, + final List wordDenyList); + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index c2e0eb4..f09c77e 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -21,6 +21,7 @@ import com.github.houbb.sensitive.word.support.replace.WordReplaces; import com.github.houbb.sensitive.word.support.result.WordResultHandlers; import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions; import com.github.houbb.sensitive.word.support.tag.WordTags; +import com.github.houbb.sensitive.word.support.warmup.WordWarmUps; import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils; import com.github.houbb.sensitive.word.utils.InnerWordTagUtils; @@ -226,6 +227,12 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { */ private IWordFormatText wordFormatText = WordFormatTexts.defaults(); + /** + * 预热 + * @since 0.29.0 + */ + private IWordWarmUp wordWarmUp = WordWarmUps.defaults(); + /** * 新建验证实例 *

@@ -273,9 +280,19 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { this.context = context; + this.warmUp(wordAllowList, wordDenyList); + return this; } + /** + * 避免冷启动 + * @since 0.29.0 + */ + private void warmUp(final List wordAllowList, final List wordDenyList) { + this.wordWarmUp.warmUp(this, context, wordAllowList, wordDenyList); + } + /** * 构建默认的上下文 * @@ -466,6 +483,13 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { return this; } + public SensitiveWordBs wordWarmUp(IWordWarmUp wordWarmUp) { + ArgUtil.notNull(wordWarmUp, "wordWarmUp"); + + this.wordWarmUp = wordWarmUp; + return this; + } + //-------------------------------------------------------- 基础属性设置 /** * 是否启用 ipv4 校验 @@ -663,8 +687,6 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { * @since 0.0.1 */ public List findAll(final String target, final IWordResultHandler handler) { - ArgUtil.notNull(handler, "handler"); - List wordResults = sensitiveWord.findAll(target, context); return CollectionUtil.toList(wordResults, new IHandler() { @Override @@ -685,8 +707,6 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { * @since 0.0.1 */ public R findFirst(final String target, final IWordResultHandler handler) { - ArgUtil.notNull(handler, "handler"); - IWordResult wordResult = sensitiveWord.findFirst(target, context); return handler.handle(wordResult, context, target); } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/warmup/WordWarmUpDefault.java b/src/main/java/com/github/houbb/sensitive/word/support/warmup/WordWarmUpDefault.java new file mode 100644 index 0000000..3723112 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/warmup/WordWarmUpDefault.java @@ -0,0 +1,36 @@ +package com.github.houbb.sensitive.word.support.warmup; + +import com.github.houbb.heaven.util.util.CollectionUtil; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.api.IWordWarmUp; +import com.github.houbb.sensitive.word.bs.SensitiveWordBs; + +import java.util.List; + +/** + * 默认策略 + * @since 1.0.0 + */ +public class WordWarmUpDefault implements IWordWarmUp { + + @Override + public void warmUp(SensitiveWordBs sensitiveWordBs, IWordContext wordContext, List wordAllowList, List wordDenyList) { + String testInfo = "sensitive-word"; + if(CollectionUtil.isNotEmpty(wordAllowList)) { + testInfo = testInfo + " " + wordAllowList.get(0); + } + if(CollectionUtil.isNotEmpty(wordDenyList)) { + testInfo = testInfo + " " + wordDenyList.get(0); + } + + // 只能说优化,但是无法杜绝 + for(int i = 0; i < 5; i++) { + sensitiveWordBs.findAll(testInfo); + sensitiveWordBs.findFirst(testInfo); + sensitiveWordBs.contains(testInfo); + sensitiveWordBs.replace(testInfo); + sensitiveWordBs.tags(testInfo); + } + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/warmup/WordWarmUps.java b/src/main/java/com/github/houbb/sensitive/word/support/warmup/WordWarmUps.java new file mode 100644 index 0000000..1ae26bb --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/warmup/WordWarmUps.java @@ -0,0 +1,16 @@ +package com.github.houbb.sensitive.word.support.warmup; + +import com.github.houbb.sensitive.word.api.IWordWarmUp; +import com.github.houbb.sensitive.word.bs.SensitiveWordBs; + +/** + * 预热策略 + * @since 0.29.0 + */ +public final class WordWarmUps { + + public static IWordWarmUp defaults() { + return new WordWarmUpDefault(); + } + +} diff --git a/src/test/java/com/github/houbb/sensitive/word/issues/Issue131.java b/src/test/java/com/github/houbb/sensitive/word/issues/Issue131.java index 1e4bdb1..a1b90e0 100644 --- a/src/test/java/com/github/houbb/sensitive/word/issues/Issue131.java +++ b/src/test/java/com/github/houbb/sensitive/word/issues/Issue131.java @@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.issues; import com.github.houbb.sensitive.word.api.IWordDeny; import com.github.houbb.sensitive.word.bs.SensitiveWordBs; import com.github.houbb.sensitive.word.support.allow.WordAllows; +import com.github.houbb.sensitive.word.support.deny.WordDenys; import com.github.houbb.sensitive.word.support.tag.WordTags; import java.io.IOException; @@ -25,12 +26,7 @@ public class Issue131 { SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() .wordFailFast(true) .wordAllow(WordAllows.empty()) - .wordDeny(new IWordDeny() { - @Override - public List deny() { - return allWord; - } - }) + .wordDeny(WordDenys.empty()) .ignoreChineseStyle(false) .ignoreCase(false) .ignoreEnglishStyle(false) @@ -39,17 +35,12 @@ public class Issue131 { .ignoreWidth(false) .wordTag(WordTags.none()) .init(); - long time = System.currentTimeMillis(); - costTimeTest(sensitiveWordBs, demo1); - long cTime = System.currentTimeMillis() - time; - System.out.println("---DONE"+cTime); - } - private static void costTimeTest(SensitiveWordBs sensitiveWordBs, String demo1) throws IOException { - int count = 10000; - - for (int i = 0; i < count; i++) { + for(int i = 0; i < 5; i++) { + long time = System.nanoTime(); List emitWord1 = sensitiveWordBs.findAll(demo1); + long cTime = System.nanoTime() - time; + System.out.println("Total="+cTime); } }