opt warmUp

This commit is contained in:
binbin.hou
2025-09-04 19:37:46 +08:00
parent 3488319f29
commit 44c26b985a
8 changed files with 117 additions and 24 deletions

View File

@@ -459,6 +459,8 @@
# release_0.29.0
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|-------------------------------|:------------------|:---------------------------------------------------|
| 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|---------------------------------|:------------------|:---------------------------------------------------|
| 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
| 2 | O | 移除引导类公共方法的 check | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
| 3 | A | 新增 warmUp | 2025-9-4 16:22:24 | |

View File

@@ -110,7 +110,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.28.0</version>
<version>0.29.0</version>
</dependency>
```
@@ -1429,6 +1429,8 @@ FormatCombine/CheckCombine/AllowDenyCombine 组合策略,允许用户自定义
[v0.27.0 敏感词库独立拆分](https://houbb.github.io/2020/01/07/sensitive-word-15-v0.27.0-dict-split)
[v0.28.0+v0.29.0 敏感词性能优化值本地方法调用为何这么慢?](https://houbb.github.io/2025/08/29/sensitive-word-why-so-slow)
![wechat](https://img-blog.csdnimg.cn/63926529df364f09bcb203a8a9016854.png)
# NLP 开源矩阵

View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.29.0-SNAPSHOT</version>
<version>0.29.0</version>
<properties>
<!--============================== All Plugins START ==============================-->

View File

@@ -0,0 +1,26 @@
package com.github.houbb.sensitive.word.api;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
import java.util.List;
/**
* 提前预热触发类加载、JIT 优化等
* @author binbin.hou
* @since 0.29.0
*/
public interface IWordWarmUp {
/**
* 预热
* @param sensitiveWordBs 引导类本身
* @param wordContext 上下文
* @param wordDenyList 允许
* @param wordAllowList 禁止
*/
void warmUp(final SensitiveWordBs sensitiveWordBs,
final IWordContext wordContext,
final List<String> wordAllowList,
final List<String> wordDenyList);
}

View File

@@ -21,6 +21,7 @@ import com.github.houbb.sensitive.word.support.replace.WordReplaces;
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
import com.github.houbb.sensitive.word.support.tag.WordTags;
import com.github.houbb.sensitive.word.support.warmup.WordWarmUps;
import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
import com.github.houbb.sensitive.word.utils.InnerWordTagUtils;
@@ -226,6 +227,12 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
*/
private IWordFormatText wordFormatText = WordFormatTexts.defaults();
/**
* 预热
* @since 0.29.0
*/
private IWordWarmUp wordWarmUp = WordWarmUps.defaults();
/**
* 新建验证实例
* <p>
@@ -273,9 +280,19 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
this.context = context;
this.warmUp(wordAllowList, wordDenyList);
return this;
}
/**
* 避免冷启动
* @since 0.29.0
*/
private void warmUp(final List<String> wordAllowList, final List<String> wordDenyList) {
this.wordWarmUp.warmUp(this, context, wordAllowList, wordDenyList);
}
/**
* 构建默认的上下文
*
@@ -466,6 +483,13 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
return this;
}
public SensitiveWordBs wordWarmUp(IWordWarmUp wordWarmUp) {
ArgUtil.notNull(wordWarmUp, "wordWarmUp");
this.wordWarmUp = wordWarmUp;
return this;
}
//-------------------------------------------------------- 基础属性设置
/**
* 是否启用 ipv4 校验
@@ -663,8 +687,6 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
* @since 0.0.1
*/
public <R> List<R> findAll(final String target, final IWordResultHandler<R> handler) {
ArgUtil.notNull(handler, "handler");
List<IWordResult> wordResults = sensitiveWord.findAll(target, context);
return CollectionUtil.toList(wordResults, new IHandler<IWordResult, R>() {
@Override
@@ -685,8 +707,6 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
* @since 0.0.1
*/
public <R> R findFirst(final String target, final IWordResultHandler<R> handler) {
ArgUtil.notNull(handler, "handler");
IWordResult wordResult = sensitiveWord.findFirst(target, context);
return handler.handle(wordResult, context, target);
}

View File

@@ -0,0 +1,36 @@
package com.github.houbb.sensitive.word.support.warmup;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordWarmUp;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
import java.util.List;
/**
* 默认策略
* @since 1.0.0
*/
public class WordWarmUpDefault implements IWordWarmUp {
@Override
public void warmUp(SensitiveWordBs sensitiveWordBs, IWordContext wordContext, List<String> wordAllowList, List<String> wordDenyList) {
String testInfo = "sensitive-word";
if(CollectionUtil.isNotEmpty(wordAllowList)) {
testInfo = testInfo + " " + wordAllowList.get(0);
}
if(CollectionUtil.isNotEmpty(wordDenyList)) {
testInfo = testInfo + " " + wordDenyList.get(0);
}
// 只能说优化,但是无法杜绝
for(int i = 0; i < 5; i++) {
sensitiveWordBs.findAll(testInfo);
sensitiveWordBs.findFirst(testInfo);
sensitiveWordBs.contains(testInfo);
sensitiveWordBs.replace(testInfo);
sensitiveWordBs.tags(testInfo);
}
}
}

View File

@@ -0,0 +1,16 @@
package com.github.houbb.sensitive.word.support.warmup;
import com.github.houbb.sensitive.word.api.IWordWarmUp;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
/**
* 预热策略
* @since 0.29.0
*/
public final class WordWarmUps {
public static IWordWarmUp defaults() {
return new WordWarmUpDefault();
}
}

View File

@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.issues;
import com.github.houbb.sensitive.word.api.IWordDeny;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
import com.github.houbb.sensitive.word.support.tag.WordTags;
import java.io.IOException;
@@ -25,12 +26,7 @@ public class Issue131 {
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
.wordFailFast(true)
.wordAllow(WordAllows.empty())
.wordDeny(new IWordDeny() {
@Override
public List<String> deny() {
return allWord;
}
})
.wordDeny(WordDenys.empty())
.ignoreChineseStyle(false)
.ignoreCase(false)
.ignoreEnglishStyle(false)
@@ -39,17 +35,12 @@ public class Issue131 {
.ignoreWidth(false)
.wordTag(WordTags.none())
.init();
long time = System.currentTimeMillis();
costTimeTest(sensitiveWordBs, demo1);
long cTime = System.currentTimeMillis() - time;
System.out.println("---DONE"+cTime);
}
private static void costTimeTest(SensitiveWordBs sensitiveWordBs, String demo1) throws IOException {
int count = 10000;
for (int i = 0; i < count; i++) {
for(int i = 0; i < 5; i++) {
long time = System.nanoTime();
List<String> emitWord1 = sensitiveWordBs.findAll(demo1);
long cTime = System.nanoTime() - time;
System.out.println("Total="+cTime);
}
}