opt warmUp

This commit is contained in:
binbin.hou
2025-09-04 19:37:46 +08:00
parent 3488319f29
commit 44c26b985a
8 changed files with 117 additions and 24 deletions

View File

@@ -459,6 +459,8 @@
# release_0.29.0 # release_0.29.0
| 序号 | 变更类型 | 说明 | 时间 | 备注 | | 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|-------------------------------|:------------------|:---------------------------------------------------| |:---|:-----|---------------------------------|:------------------|:---------------------------------------------------|
| 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 | | 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
| 2 | O | 移除引导类公共方法的 check | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
| 3 | A | 新增 warmUp | 2025-9-4 16:22:24 | |

View File

@@ -110,7 +110,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大
<dependency> <dependency>
<groupId>com.github.houbb</groupId> <groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId> <artifactId>sensitive-word</artifactId>
<version>0.28.0</version> <version>0.29.0</version>
</dependency> </dependency>
``` ```
@@ -1429,6 +1429,8 @@ FormatCombine/CheckCombine/AllowDenyCombine 组合策略,允许用户自定义
[v0.27.0 敏感词库独立拆分](https://houbb.github.io/2020/01/07/sensitive-word-15-v0.27.0-dict-split) [v0.27.0 敏感词库独立拆分](https://houbb.github.io/2020/01/07/sensitive-word-15-v0.27.0-dict-split)
[v0.28.0+v0.29.0 敏感词性能优化值本地方法调用为何这么慢?](https://houbb.github.io/2025/08/29/sensitive-word-why-so-slow)
![wechat](https://img-blog.csdnimg.cn/63926529df364f09bcb203a8a9016854.png) ![wechat](https://img-blog.csdnimg.cn/63926529df364f09bcb203a8a9016854.png)
# NLP 开源矩阵 # NLP 开源矩阵

View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId> <groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId> <artifactId>sensitive-word</artifactId>
<version>0.29.0-SNAPSHOT</version> <version>0.29.0</version>
<properties> <properties>
<!--============================== All Plugins START ==============================--> <!--============================== All Plugins START ==============================-->

View File

@@ -0,0 +1,26 @@
package com.github.houbb.sensitive.word.api;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
import java.util.List;
/**
* 提前预热触发类加载、JIT 优化等
* @author binbin.hou
* @since 0.29.0
*/
public interface IWordWarmUp {
/**
* 预热
* @param sensitiveWordBs 引导类本身
* @param wordContext 上下文
* @param wordDenyList 允许
* @param wordAllowList 禁止
*/
void warmUp(final SensitiveWordBs sensitiveWordBs,
final IWordContext wordContext,
final List<String> wordAllowList,
final List<String> wordDenyList);
}

View File

@@ -21,6 +21,7 @@ import com.github.houbb.sensitive.word.support.replace.WordReplaces;
import com.github.houbb.sensitive.word.support.result.WordResultHandlers; import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions; import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
import com.github.houbb.sensitive.word.support.tag.WordTags; import com.github.houbb.sensitive.word.support.tag.WordTags;
import com.github.houbb.sensitive.word.support.warmup.WordWarmUps;
import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils; import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
import com.github.houbb.sensitive.word.utils.InnerWordTagUtils; import com.github.houbb.sensitive.word.utils.InnerWordTagUtils;
@@ -226,6 +227,12 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
*/ */
private IWordFormatText wordFormatText = WordFormatTexts.defaults(); private IWordFormatText wordFormatText = WordFormatTexts.defaults();
/**
* 预热
* @since 0.29.0
*/
private IWordWarmUp wordWarmUp = WordWarmUps.defaults();
/** /**
* 新建验证实例 * 新建验证实例
* <p> * <p>
@@ -273,9 +280,19 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
this.context = context; this.context = context;
this.warmUp(wordAllowList, wordDenyList);
return this; return this;
} }
/**
* 避免冷启动
* @since 0.29.0
*/
private void warmUp(final List<String> wordAllowList, final List<String> wordDenyList) {
this.wordWarmUp.warmUp(this, context, wordAllowList, wordDenyList);
}
/** /**
* 构建默认的上下文 * 构建默认的上下文
* *
@@ -466,6 +483,13 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
return this; return this;
} }
public SensitiveWordBs wordWarmUp(IWordWarmUp wordWarmUp) {
ArgUtil.notNull(wordWarmUp, "wordWarmUp");
this.wordWarmUp = wordWarmUp;
return this;
}
//-------------------------------------------------------- 基础属性设置 //-------------------------------------------------------- 基础属性设置
/** /**
* 是否启用 ipv4 校验 * 是否启用 ipv4 校验
@@ -663,8 +687,6 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
* @since 0.0.1 * @since 0.0.1
*/ */
public <R> List<R> findAll(final String target, final IWordResultHandler<R> handler) { public <R> List<R> findAll(final String target, final IWordResultHandler<R> handler) {
ArgUtil.notNull(handler, "handler");
List<IWordResult> wordResults = sensitiveWord.findAll(target, context); List<IWordResult> wordResults = sensitiveWord.findAll(target, context);
return CollectionUtil.toList(wordResults, new IHandler<IWordResult, R>() { return CollectionUtil.toList(wordResults, new IHandler<IWordResult, R>() {
@Override @Override
@@ -685,8 +707,6 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
* @since 0.0.1 * @since 0.0.1
*/ */
public <R> R findFirst(final String target, final IWordResultHandler<R> handler) { public <R> R findFirst(final String target, final IWordResultHandler<R> handler) {
ArgUtil.notNull(handler, "handler");
IWordResult wordResult = sensitiveWord.findFirst(target, context); IWordResult wordResult = sensitiveWord.findFirst(target, context);
return handler.handle(wordResult, context, target); return handler.handle(wordResult, context, target);
} }

View File

@@ -0,0 +1,36 @@
package com.github.houbb.sensitive.word.support.warmup;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordWarmUp;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
import java.util.List;
/**
* 默认策略
* @since 1.0.0
*/
public class WordWarmUpDefault implements IWordWarmUp {
@Override
public void warmUp(SensitiveWordBs sensitiveWordBs, IWordContext wordContext, List<String> wordAllowList, List<String> wordDenyList) {
String testInfo = "sensitive-word";
if(CollectionUtil.isNotEmpty(wordAllowList)) {
testInfo = testInfo + " " + wordAllowList.get(0);
}
if(CollectionUtil.isNotEmpty(wordDenyList)) {
testInfo = testInfo + " " + wordDenyList.get(0);
}
// 只能说优化,但是无法杜绝
for(int i = 0; i < 5; i++) {
sensitiveWordBs.findAll(testInfo);
sensitiveWordBs.findFirst(testInfo);
sensitiveWordBs.contains(testInfo);
sensitiveWordBs.replace(testInfo);
sensitiveWordBs.tags(testInfo);
}
}
}

View File

@@ -0,0 +1,16 @@
package com.github.houbb.sensitive.word.support.warmup;
import com.github.houbb.sensitive.word.api.IWordWarmUp;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
/**
* 预热策略
* @since 0.29.0
*/
public final class WordWarmUps {
public static IWordWarmUp defaults() {
return new WordWarmUpDefault();
}
}

View File

@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.issues;
import com.github.houbb.sensitive.word.api.IWordDeny; import com.github.houbb.sensitive.word.api.IWordDeny;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs; import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
import com.github.houbb.sensitive.word.support.allow.WordAllows; import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
import com.github.houbb.sensitive.word.support.tag.WordTags; import com.github.houbb.sensitive.word.support.tag.WordTags;
import java.io.IOException; import java.io.IOException;
@@ -25,12 +26,7 @@ public class Issue131 {
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
.wordFailFast(true) .wordFailFast(true)
.wordAllow(WordAllows.empty()) .wordAllow(WordAllows.empty())
.wordDeny(new IWordDeny() { .wordDeny(WordDenys.empty())
@Override
public List<String> deny() {
return allWord;
}
})
.ignoreChineseStyle(false) .ignoreChineseStyle(false)
.ignoreCase(false) .ignoreCase(false)
.ignoreEnglishStyle(false) .ignoreEnglishStyle(false)
@@ -39,17 +35,12 @@ public class Issue131 {
.ignoreWidth(false) .ignoreWidth(false)
.wordTag(WordTags.none()) .wordTag(WordTags.none())
.init(); .init();
long time = System.currentTimeMillis();
costTimeTest(sensitiveWordBs, demo1);
long cTime = System.currentTimeMillis() - time;
System.out.println("---DONE"+cTime);
}
private static void costTimeTest(SensitiveWordBs sensitiveWordBs, String demo1) throws IOException { for(int i = 0; i < 5; i++) {
int count = 10000; long time = System.nanoTime();
for (int i = 0; i < count; i++) {
List<String> emitWord1 = sensitiveWordBs.findAll(demo1); List<String> emitWord1 = sensitiveWordBs.findAll(demo1);
long cTime = System.nanoTime() - time;
System.out.println("Total="+cTime);
} }
} }