mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
opt warmUp
This commit is contained in:
@@ -459,6 +459,8 @@
|
||||
|
||||
# release_0.29.0
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|-------------------------------|:------------------|:---------------------------------------------------|
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|---------------------------------|:------------------|:---------------------------------------------------|
|
||||
| 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
|
||||
| 2 | O | 移除引导类公共方法的 check | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
|
||||
| 3 | A | 新增 warmUp | 2025-9-4 16:22:24 | |
|
||||
@@ -110,7 +110,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大
|
||||
<dependency>
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.28.0</version>
|
||||
<version>0.29.0</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@@ -1429,6 +1429,8 @@ FormatCombine/CheckCombine/AllowDenyCombine 组合策略,允许用户自定义
|
||||
|
||||
[v0.27.0 敏感词库独立拆分](https://houbb.github.io/2020/01/07/sensitive-word-15-v0.27.0-dict-split)
|
||||
|
||||
[v0.28.0+v0.29.0 敏感词性能优化值本地方法调用为何这么慢?](https://houbb.github.io/2025/08/29/sensitive-word-why-so-slow)
|
||||
|
||||

|
||||
|
||||
# NLP 开源矩阵
|
||||
|
||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.29.0-SNAPSHOT</version>
|
||||
<version>0.29.0</version>
|
||||
|
||||
<properties>
|
||||
<!--============================== All Plugins START ==============================-->
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
package com.github.houbb.sensitive.word.api;
|
||||
|
||||
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 提前预热,触发类加载、JIT 优化等
|
||||
* @author binbin.hou
|
||||
* @since 0.29.0
|
||||
*/
|
||||
public interface IWordWarmUp {
|
||||
|
||||
/**
|
||||
* 预热
|
||||
* @param sensitiveWordBs 引导类本身
|
||||
* @param wordContext 上下文
|
||||
* @param wordDenyList 允许
|
||||
* @param wordAllowList 禁止
|
||||
*/
|
||||
void warmUp(final SensitiveWordBs sensitiveWordBs,
|
||||
final IWordContext wordContext,
|
||||
final List<String> wordAllowList,
|
||||
final List<String> wordDenyList);
|
||||
|
||||
}
|
||||
@@ -21,6 +21,7 @@ import com.github.houbb.sensitive.word.support.replace.WordReplaces;
|
||||
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
|
||||
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
|
||||
import com.github.houbb.sensitive.word.support.tag.WordTags;
|
||||
import com.github.houbb.sensitive.word.support.warmup.WordWarmUps;
|
||||
import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
|
||||
import com.github.houbb.sensitive.word.utils.InnerWordTagUtils;
|
||||
|
||||
@@ -226,6 +227,12 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
||||
*/
|
||||
private IWordFormatText wordFormatText = WordFormatTexts.defaults();
|
||||
|
||||
/**
|
||||
* 预热
|
||||
* @since 0.29.0
|
||||
*/
|
||||
private IWordWarmUp wordWarmUp = WordWarmUps.defaults();
|
||||
|
||||
/**
|
||||
* 新建验证实例
|
||||
* <p>
|
||||
@@ -273,9 +280,19 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
||||
|
||||
this.context = context;
|
||||
|
||||
this.warmUp(wordAllowList, wordDenyList);
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 避免冷启动
|
||||
* @since 0.29.0
|
||||
*/
|
||||
private void warmUp(final List<String> wordAllowList, final List<String> wordDenyList) {
|
||||
this.wordWarmUp.warmUp(this, context, wordAllowList, wordDenyList);
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建默认的上下文
|
||||
*
|
||||
@@ -466,6 +483,13 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
||||
return this;
|
||||
}
|
||||
|
||||
public SensitiveWordBs wordWarmUp(IWordWarmUp wordWarmUp) {
|
||||
ArgUtil.notNull(wordWarmUp, "wordWarmUp");
|
||||
|
||||
this.wordWarmUp = wordWarmUp;
|
||||
return this;
|
||||
}
|
||||
|
||||
//-------------------------------------------------------- 基础属性设置
|
||||
/**
|
||||
* 是否启用 ipv4 校验
|
||||
@@ -663,8 +687,6 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public <R> List<R> findAll(final String target, final IWordResultHandler<R> handler) {
|
||||
ArgUtil.notNull(handler, "handler");
|
||||
|
||||
List<IWordResult> wordResults = sensitiveWord.findAll(target, context);
|
||||
return CollectionUtil.toList(wordResults, new IHandler<IWordResult, R>() {
|
||||
@Override
|
||||
@@ -685,8 +707,6 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public <R> R findFirst(final String target, final IWordResultHandler<R> handler) {
|
||||
ArgUtil.notNull(handler, "handler");
|
||||
|
||||
IWordResult wordResult = sensitiveWord.findFirst(target, context);
|
||||
return handler.handle(wordResult, context, target);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
package com.github.houbb.sensitive.word.support.warmup;
|
||||
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordWarmUp;
|
||||
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 默认策略
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public class WordWarmUpDefault implements IWordWarmUp {
|
||||
|
||||
@Override
|
||||
public void warmUp(SensitiveWordBs sensitiveWordBs, IWordContext wordContext, List<String> wordAllowList, List<String> wordDenyList) {
|
||||
String testInfo = "sensitive-word";
|
||||
if(CollectionUtil.isNotEmpty(wordAllowList)) {
|
||||
testInfo = testInfo + " " + wordAllowList.get(0);
|
||||
}
|
||||
if(CollectionUtil.isNotEmpty(wordDenyList)) {
|
||||
testInfo = testInfo + " " + wordDenyList.get(0);
|
||||
}
|
||||
|
||||
// 只能说优化,但是无法杜绝
|
||||
for(int i = 0; i < 5; i++) {
|
||||
sensitiveWordBs.findAll(testInfo);
|
||||
sensitiveWordBs.findFirst(testInfo);
|
||||
sensitiveWordBs.contains(testInfo);
|
||||
sensitiveWordBs.replace(testInfo);
|
||||
sensitiveWordBs.tags(testInfo);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.github.houbb.sensitive.word.support.warmup;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordWarmUp;
|
||||
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||
|
||||
/**
|
||||
* 预热策略
|
||||
* @since 0.29.0
|
||||
*/
|
||||
public final class WordWarmUps {
|
||||
|
||||
public static IWordWarmUp defaults() {
|
||||
return new WordWarmUpDefault();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.issues;
|
||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
||||
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||
import com.github.houbb.sensitive.word.support.tag.WordTags;
|
||||
|
||||
import java.io.IOException;
|
||||
@@ -25,12 +26,7 @@ public class Issue131 {
|
||||
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
|
||||
.wordFailFast(true)
|
||||
.wordAllow(WordAllows.empty())
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return allWord;
|
||||
}
|
||||
})
|
||||
.wordDeny(WordDenys.empty())
|
||||
.ignoreChineseStyle(false)
|
||||
.ignoreCase(false)
|
||||
.ignoreEnglishStyle(false)
|
||||
@@ -39,17 +35,12 @@ public class Issue131 {
|
||||
.ignoreWidth(false)
|
||||
.wordTag(WordTags.none())
|
||||
.init();
|
||||
long time = System.currentTimeMillis();
|
||||
costTimeTest(sensitiveWordBs, demo1);
|
||||
long cTime = System.currentTimeMillis() - time;
|
||||
System.out.println("---DONE"+cTime);
|
||||
}
|
||||
|
||||
private static void costTimeTest(SensitiveWordBs sensitiveWordBs, String demo1) throws IOException {
|
||||
int count = 10000;
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
for(int i = 0; i < 5; i++) {
|
||||
long time = System.nanoTime();
|
||||
List<String> emitWord1 = sensitiveWordBs.findAll(demo1);
|
||||
long cTime = System.nanoTime() - time;
|
||||
System.out.println("Total="+cTime);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user