v0.29.1 opt

This commit is contained in:
binbin.hou
2025-09-05 11:50:03 +08:00
parent 8545b9ed7f
commit a46f43024d
11 changed files with 131 additions and 65 deletions

View File

@@ -464,3 +464,10 @@
| 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
| 2 | O | 移除引导类公共方法的 check | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
| 3 | A | 新增 warmUp | 2025-9-4 16:22:24 | |
# release_0.29.1
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|-----------------------------|:------------------|:--------------------|
| 1 | O | 改进 check、format 的 chains 方法 | 2025-9-5 16:22:24 | 优化性能 |
| 2 | O | InnerWordFormatUtils#format | 2025-9-5 16:22:24 | 优化性能+内存 toCharArray |

View File

@@ -110,7 +110,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.29.0</version>
<version>0.29.1</version>
</dependency>
```
@@ -1365,25 +1365,13 @@ ps: 不同环境会有差异,但是比例基本稳定。
# 后期 road-map
- [x] 移除单个汉字的敏感词,在中国,要把词组当做一次词,降低误判率。
- [] fastutil 优化 jdk 内置集合类
- [x] 支持单个的敏感词变化?
- [] 中文转换优化-opencc4j 内存+性能优化
remove、add、edit?
WordFormatIgnoreChineseStyle 转换类不够精简,可以优化一下。采用直接映射(收口在 opencc4j
- [x] 敏感词标签接口支持
- [x] 敏感词处理时标签支持
- [x] wordData 的内存占用对比 + 优化
- [x] 用户指定自定义的词组,同时允许指定词组的组合获取,更加灵活
FormatCombine/CheckCombine/AllowDenyCombine 组合策略,允许用户自定义。
- [ ] word check 策略的优化,统一遍历+转换
- [ ] 添加 ThreadLocal 等性能优化
- [] 各种其他涉及到 char 拆箱的地方改进
# 拓展阅读

View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.29.0</version>
<version>0.29.1</version>
<properties>
<!--============================== All Plugins START ==============================-->

View File

@@ -0,0 +1,47 @@
package com.github.houbb.sensitive.word.support.check;
import com.github.houbb.heaven.util.common.ArgUtil;
import com.github.houbb.sensitive.word.api.IWordCheck;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.support.result.WordLengthResult;
import java.util.List;
/**
* 集合
* @author binbin.hou
* @since 0.30.0
*/
public class WordCheckArray implements IWordCheck {
private final IWordCheck[] sensitiveChecks;
private final int size;
public WordCheckArray(List<IWordCheck> sensitiveChecks) {
ArgUtil.notEmpty(sensitiveChecks, "sensitiveChecks");
this.size = sensitiveChecks.size();
this.sensitiveChecks = new IWordCheck[size];
for(int i = 0; i < size; i++) {
this.sensitiveChecks[i] = sensitiveChecks.get(i);
}
}
@Override
public WordCheckResult sensitiveCheck(int beginIndex, InnerSensitiveWordContext checkContext) {
// 循环调用
for(int i = 0; i < size; i++) {
IWordCheck sensitiveCheck = sensitiveChecks[i];
WordCheckResult result = sensitiveCheck.sensitiveCheck(beginIndex, checkContext);
WordLengthResult wordLengthResult = result.wordLengthResult();
if(wordLengthResult.wordAllowLen() > 0 || wordLengthResult.wordDenyLen()> 0) {
return result;
}
}
// 这里直接进行正则表达式相关的调用。
// 默认返回 0
return WordCheckNone.getNoneResult();
}
}

View File

@@ -12,6 +12,7 @@ import java.util.List;
* 检测初始化类
* @since 0.3.0
*/
@Deprecated
public abstract class WordCheckInit implements IWordCheck {
/**

View File

@@ -1,13 +1,10 @@
package com.github.houbb.sensitive.word.support.check;
import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.util.util.ArrayUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.IWordCheck;
import com.github.houbb.sensitive.word.api.IWordContext;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
/**
@@ -23,29 +20,16 @@ public final class WordChecks {
return none();
}
return new WordCheckInit() {
@Override
protected void init(Pipeline<IWordCheck> pipeline) {
for(IWordCheck check : sensitiveChecks) {
pipeline.addLast(check);
}
}
};
List<IWordCheck> wordChecks = new ArrayList<>(sensitiveChecks.length);
return array(wordChecks);
}
public static IWordCheck chains(final Collection<IWordCheck> sensitiveChecks) {
public static IWordCheck chains(final List<IWordCheck> sensitiveChecks) {
if (CollectionUtil.isEmpty(sensitiveChecks)){
return none();
}
return new WordCheckInit() {
@Override
protected void init(Pipeline<IWordCheck> pipeline) {
for(IWordCheck check : sensitiveChecks) {
pipeline.addLast(check);
}
}
};
return array(sensitiveChecks);
}
public static IWordCheck email() {
@@ -88,4 +72,14 @@ public final class WordChecks {
return WordCheckUrlNoPrefix.getInstance();
}
/**
* 集合
*
* @return 实现
* @since 0.30.0
*/
public static IWordCheck array(final List<IWordCheck> wordChecks) {
return new WordCheckArray(wordChecks);
}
}

View File

@@ -31,7 +31,7 @@ public class WordDataTreeNode implements ISensitiveWordDestroy {
return this;
}
public WordDataTreeNode getSubNode(final char c) {
public WordDataTreeNode getSubNode(final Character c) {
if(subNodeMap == null) {
return null;
}
@@ -52,14 +52,14 @@ public class WordDataTreeNode implements ISensitiveWordDestroy {
subNodeMap=null;
}
public void removeNode(final char c) {
public void removeNode(final Character c) {
if (subNodeMap == null) {
return;
}
subNodeMap.remove(c);
}
public WordDataTreeNode addSubNode(char c, WordDataTreeNode subNode) {
public WordDataTreeNode addSubNode(Character c, WordDataTreeNode subNode) {
if(this.subNodeMap == null) {
subNodeMap = new HashMap<>();
}

View File

@@ -0,0 +1,39 @@
package com.github.houbb.sensitive.word.support.format;
import com.github.houbb.heaven.util.common.ArgUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordFormat;
import java.util.List;
/**
* 直接列表调用
* @author binbin.hou
* @since 0.30.0
*/
public class WordFormatArray implements IWordFormat {
private final IWordFormat[] wordFormats;
private final int size;
public WordFormatArray(List<IWordFormat> wordFormats) {
ArgUtil.notEmpty(wordFormats, "wordFormats");
this.size = wordFormats.size();
this.wordFormats = new IWordFormat[size];
for(int i = 0; i < size; i++) {
this.wordFormats[i] = wordFormats.get(i);
}
}
@Override
public char format(char original, IWordContext context) {
char c = original;
for(int i = 0; i < size; i++) {
IWordFormat charFormat = wordFormats[i];
c = charFormat.format(c, context);
}
return c;
}
}

View File

@@ -14,6 +14,7 @@ import java.util.List;
* @since 0.0.5
*/
@ThreadSafe
@Deprecated
public abstract class WordFormatInit implements IWordFormat {
/**

View File

@@ -1,13 +1,10 @@
package com.github.houbb.sensitive.word.support.format;
import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.heaven.util.util.ArrayUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.IWordFormat;
import com.github.houbb.sensitive.word.api.IWordContext;
import java.util.Collection;
import java.util.ArrayList;
import java.util.List;
/**
@@ -29,14 +26,8 @@ public final class WordFormats {
return none();
}
return new WordFormatInit() {
@Override
protected void init(Pipeline<IWordFormat> pipeline) {
for(IWordFormat charFormat : charFormats) {
pipeline.addLast(charFormat);
}
}
};
List<IWordFormat> wordFormats = new ArrayList<>(charFormats.length);
return array(wordFormats);
}
/**
@@ -44,19 +35,12 @@ public final class WordFormats {
* @param charFormats 列表
* @return 结果
*/
public static IWordFormat chains(final Collection<IWordFormat> charFormats) {
public static IWordFormat chains(final List<IWordFormat> charFormats) {
if(CollectionUtil.isEmpty(charFormats)) {
return none();
}
return new WordFormatInit() {
@Override
protected void init(Pipeline<IWordFormat> pipeline) {
for(IWordFormat charFormat : charFormats) {
pipeline.addLast(charFormat);
}
}
};
return array(charFormats);
}
public static IWordFormat none() {
@@ -82,4 +66,8 @@ public final class WordFormats {
return WordFormatIgnoreWidth.getInstance();
}
public static IWordFormat array(final List<IWordFormat> wordFormats) {
return new WordFormatArray(wordFormats);
}
}

View File

@@ -35,8 +35,9 @@ public final class InnerWordFormatUtils {
StringBuilder stringBuilder = new StringBuilder();
IWordFormat charFormat = context.wordFormat();
char[] chars = original.toCharArray();
for(char c : chars) {
int len = original.length();
for(int i = 0; i < len; i++) {
char c = original.charAt(i);
char cf = charFormat.format(c, context);
stringBuilder.append(cf);
}