mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
opt warmUp
This commit is contained in:
@@ -459,6 +459,8 @@
|
|||||||
|
|
||||||
# release_0.29.0
|
# release_0.29.0
|
||||||
|
|
||||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||||
|:---|:-----|-------------------------------|:------------------|:---------------------------------------------------|
|
|:---|:-----|---------------------------------|:------------------|:---------------------------------------------------|
|
||||||
| 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
|
| 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
|
||||||
|
| 2 | O | 移除引导类公共方法的 check | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
|
||||||
|
| 3 | A | 新增 warmUp | 2025-9-4 16:22:24 | |
|
||||||
@@ -110,7 +110,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.github.houbb</groupId>
|
<groupId>com.github.houbb</groupId>
|
||||||
<artifactId>sensitive-word</artifactId>
|
<artifactId>sensitive-word</artifactId>
|
||||||
<version>0.28.0</version>
|
<version>0.29.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -1429,6 +1429,8 @@ FormatCombine/CheckCombine/AllowDenyCombine 组合策略,允许用户自定义
|
|||||||
|
|
||||||
[v0.27.0 敏感词库独立拆分](https://houbb.github.io/2020/01/07/sensitive-word-15-v0.27.0-dict-split)
|
[v0.27.0 敏感词库独立拆分](https://houbb.github.io/2020/01/07/sensitive-word-15-v0.27.0-dict-split)
|
||||||
|
|
||||||
|
[v0.28.0+v0.29.0 敏感词性能优化值本地方法调用为何这么慢?](https://houbb.github.io/2025/08/29/sensitive-word-why-so-slow)
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
# NLP 开源矩阵
|
# NLP 开源矩阵
|
||||||
|
|||||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.github.houbb</groupId>
|
<groupId>com.github.houbb</groupId>
|
||||||
<artifactId>sensitive-word</artifactId>
|
<artifactId>sensitive-word</artifactId>
|
||||||
<version>0.29.0-SNAPSHOT</version>
|
<version>0.29.0</version>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<!--============================== All Plugins START ==============================-->
|
<!--============================== All Plugins START ==============================-->
|
||||||
|
|||||||
@@ -0,0 +1,26 @@
|
|||||||
|
package com.github.houbb.sensitive.word.api;
|
||||||
|
|
||||||
|
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 提前预热,触发类加载、JIT 优化等
|
||||||
|
* @author binbin.hou
|
||||||
|
* @since 0.29.0
|
||||||
|
*/
|
||||||
|
public interface IWordWarmUp {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 预热
|
||||||
|
* @param sensitiveWordBs 引导类本身
|
||||||
|
* @param wordContext 上下文
|
||||||
|
* @param wordDenyList 允许
|
||||||
|
* @param wordAllowList 禁止
|
||||||
|
*/
|
||||||
|
void warmUp(final SensitiveWordBs sensitiveWordBs,
|
||||||
|
final IWordContext wordContext,
|
||||||
|
final List<String> wordAllowList,
|
||||||
|
final List<String> wordDenyList);
|
||||||
|
|
||||||
|
}
|
||||||
@@ -21,6 +21,7 @@ import com.github.houbb.sensitive.word.support.replace.WordReplaces;
|
|||||||
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
|
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
|
||||||
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
|
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
|
||||||
import com.github.houbb.sensitive.word.support.tag.WordTags;
|
import com.github.houbb.sensitive.word.support.tag.WordTags;
|
||||||
|
import com.github.houbb.sensitive.word.support.warmup.WordWarmUps;
|
||||||
import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
|
import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
|
||||||
import com.github.houbb.sensitive.word.utils.InnerWordTagUtils;
|
import com.github.houbb.sensitive.word.utils.InnerWordTagUtils;
|
||||||
|
|
||||||
@@ -226,6 +227,12 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
|||||||
*/
|
*/
|
||||||
private IWordFormatText wordFormatText = WordFormatTexts.defaults();
|
private IWordFormatText wordFormatText = WordFormatTexts.defaults();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 预热
|
||||||
|
* @since 0.29.0
|
||||||
|
*/
|
||||||
|
private IWordWarmUp wordWarmUp = WordWarmUps.defaults();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 新建验证实例
|
* 新建验证实例
|
||||||
* <p>
|
* <p>
|
||||||
@@ -273,9 +280,19 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
|||||||
|
|
||||||
this.context = context;
|
this.context = context;
|
||||||
|
|
||||||
|
this.warmUp(wordAllowList, wordDenyList);
|
||||||
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 避免冷启动
|
||||||
|
* @since 0.29.0
|
||||||
|
*/
|
||||||
|
private void warmUp(final List<String> wordAllowList, final List<String> wordDenyList) {
|
||||||
|
this.wordWarmUp.warmUp(this, context, wordAllowList, wordDenyList);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 构建默认的上下文
|
* 构建默认的上下文
|
||||||
*
|
*
|
||||||
@@ -466,6 +483,13 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public SensitiveWordBs wordWarmUp(IWordWarmUp wordWarmUp) {
|
||||||
|
ArgUtil.notNull(wordWarmUp, "wordWarmUp");
|
||||||
|
|
||||||
|
this.wordWarmUp = wordWarmUp;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
//-------------------------------------------------------- 基础属性设置
|
//-------------------------------------------------------- 基础属性设置
|
||||||
/**
|
/**
|
||||||
* 是否启用 ipv4 校验
|
* 是否启用 ipv4 校验
|
||||||
@@ -663,8 +687,6 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
|||||||
* @since 0.0.1
|
* @since 0.0.1
|
||||||
*/
|
*/
|
||||||
public <R> List<R> findAll(final String target, final IWordResultHandler<R> handler) {
|
public <R> List<R> findAll(final String target, final IWordResultHandler<R> handler) {
|
||||||
ArgUtil.notNull(handler, "handler");
|
|
||||||
|
|
||||||
List<IWordResult> wordResults = sensitiveWord.findAll(target, context);
|
List<IWordResult> wordResults = sensitiveWord.findAll(target, context);
|
||||||
return CollectionUtil.toList(wordResults, new IHandler<IWordResult, R>() {
|
return CollectionUtil.toList(wordResults, new IHandler<IWordResult, R>() {
|
||||||
@Override
|
@Override
|
||||||
@@ -685,8 +707,6 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
|||||||
* @since 0.0.1
|
* @since 0.0.1
|
||||||
*/
|
*/
|
||||||
public <R> R findFirst(final String target, final IWordResultHandler<R> handler) {
|
public <R> R findFirst(final String target, final IWordResultHandler<R> handler) {
|
||||||
ArgUtil.notNull(handler, "handler");
|
|
||||||
|
|
||||||
IWordResult wordResult = sensitiveWord.findFirst(target, context);
|
IWordResult wordResult = sensitiveWord.findFirst(target, context);
|
||||||
return handler.handle(wordResult, context, target);
|
return handler.handle(wordResult, context, target);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,36 @@
|
|||||||
|
package com.github.houbb.sensitive.word.support.warmup;
|
||||||
|
|
||||||
|
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordWarmUp;
|
||||||
|
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 默认策略
|
||||||
|
* @since 1.0.0
|
||||||
|
*/
|
||||||
|
public class WordWarmUpDefault implements IWordWarmUp {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void warmUp(SensitiveWordBs sensitiveWordBs, IWordContext wordContext, List<String> wordAllowList, List<String> wordDenyList) {
|
||||||
|
String testInfo = "sensitive-word";
|
||||||
|
if(CollectionUtil.isNotEmpty(wordAllowList)) {
|
||||||
|
testInfo = testInfo + " " + wordAllowList.get(0);
|
||||||
|
}
|
||||||
|
if(CollectionUtil.isNotEmpty(wordDenyList)) {
|
||||||
|
testInfo = testInfo + " " + wordDenyList.get(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 只能说优化,但是无法杜绝
|
||||||
|
for(int i = 0; i < 5; i++) {
|
||||||
|
sensitiveWordBs.findAll(testInfo);
|
||||||
|
sensitiveWordBs.findFirst(testInfo);
|
||||||
|
sensitiveWordBs.contains(testInfo);
|
||||||
|
sensitiveWordBs.replace(testInfo);
|
||||||
|
sensitiveWordBs.tags(testInfo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
package com.github.houbb.sensitive.word.support.warmup;
|
||||||
|
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordWarmUp;
|
||||||
|
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 预热策略
|
||||||
|
* @since 0.29.0
|
||||||
|
*/
|
||||||
|
public final class WordWarmUps {
|
||||||
|
|
||||||
|
public static IWordWarmUp defaults() {
|
||||||
|
return new WordWarmUpDefault();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.issues;
|
|||||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||||
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||||
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
||||||
|
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||||
import com.github.houbb.sensitive.word.support.tag.WordTags;
|
import com.github.houbb.sensitive.word.support.tag.WordTags;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@@ -25,12 +26,7 @@ public class Issue131 {
|
|||||||
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
|
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
|
||||||
.wordFailFast(true)
|
.wordFailFast(true)
|
||||||
.wordAllow(WordAllows.empty())
|
.wordAllow(WordAllows.empty())
|
||||||
.wordDeny(new IWordDeny() {
|
.wordDeny(WordDenys.empty())
|
||||||
@Override
|
|
||||||
public List<String> deny() {
|
|
||||||
return allWord;
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.ignoreChineseStyle(false)
|
.ignoreChineseStyle(false)
|
||||||
.ignoreCase(false)
|
.ignoreCase(false)
|
||||||
.ignoreEnglishStyle(false)
|
.ignoreEnglishStyle(false)
|
||||||
@@ -39,17 +35,12 @@ public class Issue131 {
|
|||||||
.ignoreWidth(false)
|
.ignoreWidth(false)
|
||||||
.wordTag(WordTags.none())
|
.wordTag(WordTags.none())
|
||||||
.init();
|
.init();
|
||||||
long time = System.currentTimeMillis();
|
|
||||||
costTimeTest(sensitiveWordBs, demo1);
|
|
||||||
long cTime = System.currentTimeMillis() - time;
|
|
||||||
System.out.println("---DONE"+cTime);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void costTimeTest(SensitiveWordBs sensitiveWordBs, String demo1) throws IOException {
|
for(int i = 0; i < 5; i++) {
|
||||||
int count = 10000;
|
long time = System.nanoTime();
|
||||||
|
|
||||||
for (int i = 0; i < count; i++) {
|
|
||||||
List<String> emitWord1 = sensitiveWordBs.findAll(demo1);
|
List<String> emitWord1 = sensitiveWordBs.findAll(demo1);
|
||||||
|
long cTime = System.nanoTime() - time;
|
||||||
|
System.out.println("Total="+cTime);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user