mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
v0.29.1 opt
This commit is contained in:
@@ -463,4 +463,11 @@
|
||||
|:---|:-----|---------------------------------|:------------------|:---------------------------------------------------|
|
||||
| 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
|
||||
| 2 | O | 移除引导类公共方法的 check | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
|
||||
| 3 | A | 新增 warmUp | 2025-9-4 16:22:24 | |
|
||||
| 3 | A | 新增 warmUp | 2025-9-4 16:22:24 | |
|
||||
|
||||
# release_0.29.1
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|-----------------------------|:------------------|:--------------------|
|
||||
| 1 | O | 改进 check、format 的 chains 方法 | 2025-9-5 16:22:24 | 优化性能 |
|
||||
| 2 | O | InnerWordFormatUtils#format | 2025-9-5 16:22:24 | 优化性能+内存 toCharArray |
|
||||
|
||||
22
README.md
22
README.md
@@ -110,7 +110,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大
|
||||
<dependency>
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.29.0</version>
|
||||
<version>0.29.1</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@@ -1365,25 +1365,13 @@ ps: 不同环境会有差异,但是比例基本稳定。
|
||||
|
||||
# 后期 road-map
|
||||
|
||||
- [x] 移除单个汉字的敏感词,在中国,要把词组当做一次词,降低误判率。
|
||||
- [] fastutil 优化 jdk 内置集合类
|
||||
|
||||
- [x] 支持单个的敏感词变化?
|
||||
- [] 中文转换优化-opencc4j 内存+性能优化
|
||||
|
||||
remove、add、edit?
|
||||
WordFormatIgnoreChineseStyle 转换类不够精简,可以优化一下。采用直接映射(收口在 opencc4j)。
|
||||
|
||||
- [x] 敏感词标签接口支持
|
||||
|
||||
- [x] 敏感词处理时标签支持
|
||||
|
||||
- [x] wordData 的内存占用对比 + 优化
|
||||
|
||||
- [x] 用户指定自定义的词组,同时允许指定词组的组合获取,更加灵活
|
||||
|
||||
FormatCombine/CheckCombine/AllowDenyCombine 组合策略,允许用户自定义。
|
||||
|
||||
- [ ] word check 策略的优化,统一遍历+转换
|
||||
|
||||
- [ ] 添加 ThreadLocal 等性能优化
|
||||
- [] 各种其他涉及到 char 拆箱的地方改进
|
||||
|
||||
# 拓展阅读
|
||||
|
||||
|
||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.29.0</version>
|
||||
<version>0.29.1</version>
|
||||
|
||||
<properties>
|
||||
<!--============================== All Plugins START ==============================-->
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
package com.github.houbb.sensitive.word.support.check;
|
||||
|
||||
import com.github.houbb.heaven.util.common.ArgUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordCheck;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
import com.github.houbb.sensitive.word.support.result.WordLengthResult;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 集合
|
||||
* @author binbin.hou
|
||||
* @since 0.30.0
|
||||
*/
|
||||
public class WordCheckArray implements IWordCheck {
|
||||
|
||||
private final IWordCheck[] sensitiveChecks;
|
||||
private final int size;
|
||||
public WordCheckArray(List<IWordCheck> sensitiveChecks) {
|
||||
ArgUtil.notEmpty(sensitiveChecks, "sensitiveChecks");
|
||||
|
||||
this.size = sensitiveChecks.size();
|
||||
this.sensitiveChecks = new IWordCheck[size];
|
||||
for(int i = 0; i < size; i++) {
|
||||
this.sensitiveChecks[i] = sensitiveChecks.get(i);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public WordCheckResult sensitiveCheck(int beginIndex, InnerSensitiveWordContext checkContext) {
|
||||
// 循环调用
|
||||
for(int i = 0; i < size; i++) {
|
||||
IWordCheck sensitiveCheck = sensitiveChecks[i];
|
||||
WordCheckResult result = sensitiveCheck.sensitiveCheck(beginIndex, checkContext);
|
||||
|
||||
WordLengthResult wordLengthResult = result.wordLengthResult();
|
||||
if(wordLengthResult.wordAllowLen() > 0 || wordLengthResult.wordDenyLen()> 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// 这里直接进行正则表达式相关的调用。
|
||||
// 默认返回 0
|
||||
return WordCheckNone.getNoneResult();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -12,6 +12,7 @@ import java.util.List;
|
||||
* 检测初始化类
|
||||
* @since 0.3.0
|
||||
*/
|
||||
@Deprecated
|
||||
public abstract class WordCheckInit implements IWordCheck {
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,13 +1,10 @@
|
||||
package com.github.houbb.sensitive.word.support.check;
|
||||
|
||||
import com.github.houbb.heaven.support.pipeline.Pipeline;
|
||||
import com.github.houbb.heaven.util.util.ArrayUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordCheck;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
@@ -23,29 +20,16 @@ public final class WordChecks {
|
||||
return none();
|
||||
}
|
||||
|
||||
return new WordCheckInit() {
|
||||
@Override
|
||||
protected void init(Pipeline<IWordCheck> pipeline) {
|
||||
for(IWordCheck check : sensitiveChecks) {
|
||||
pipeline.addLast(check);
|
||||
}
|
||||
}
|
||||
};
|
||||
List<IWordCheck> wordChecks = new ArrayList<>(sensitiveChecks.length);
|
||||
return array(wordChecks);
|
||||
}
|
||||
|
||||
public static IWordCheck chains(final Collection<IWordCheck> sensitiveChecks) {
|
||||
public static IWordCheck chains(final List<IWordCheck> sensitiveChecks) {
|
||||
if (CollectionUtil.isEmpty(sensitiveChecks)){
|
||||
return none();
|
||||
}
|
||||
|
||||
return new WordCheckInit() {
|
||||
@Override
|
||||
protected void init(Pipeline<IWordCheck> pipeline) {
|
||||
for(IWordCheck check : sensitiveChecks) {
|
||||
pipeline.addLast(check);
|
||||
}
|
||||
}
|
||||
};
|
||||
return array(sensitiveChecks);
|
||||
}
|
||||
|
||||
public static IWordCheck email() {
|
||||
@@ -88,4 +72,14 @@ public final class WordChecks {
|
||||
return WordCheckUrlNoPrefix.getInstance();
|
||||
}
|
||||
|
||||
/**
|
||||
* 集合
|
||||
*
|
||||
* @return 实现
|
||||
* @since 0.30.0
|
||||
*/
|
||||
public static IWordCheck array(final List<IWordCheck> wordChecks) {
|
||||
return new WordCheckArray(wordChecks);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ public class WordDataTreeNode implements ISensitiveWordDestroy {
|
||||
return this;
|
||||
}
|
||||
|
||||
public WordDataTreeNode getSubNode(final char c) {
|
||||
public WordDataTreeNode getSubNode(final Character c) {
|
||||
if(subNodeMap == null) {
|
||||
return null;
|
||||
}
|
||||
@@ -52,14 +52,14 @@ public class WordDataTreeNode implements ISensitiveWordDestroy {
|
||||
subNodeMap=null;
|
||||
}
|
||||
|
||||
public void removeNode(final char c) {
|
||||
public void removeNode(final Character c) {
|
||||
if (subNodeMap == null) {
|
||||
return;
|
||||
}
|
||||
subNodeMap.remove(c);
|
||||
}
|
||||
|
||||
public WordDataTreeNode addSubNode(char c, WordDataTreeNode subNode) {
|
||||
public WordDataTreeNode addSubNode(Character c, WordDataTreeNode subNode) {
|
||||
if(this.subNodeMap == null) {
|
||||
subNodeMap = new HashMap<>();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
package com.github.houbb.sensitive.word.support.format;
|
||||
|
||||
import com.github.houbb.heaven.util.common.ArgUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordFormat;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 直接列表调用
|
||||
* @author binbin.hou
|
||||
* @since 0.30.0
|
||||
*/
|
||||
public class WordFormatArray implements IWordFormat {
|
||||
|
||||
private final IWordFormat[] wordFormats;
|
||||
private final int size;
|
||||
public WordFormatArray(List<IWordFormat> wordFormats) {
|
||||
ArgUtil.notEmpty(wordFormats, "wordFormats");
|
||||
|
||||
this.size = wordFormats.size();
|
||||
this.wordFormats = new IWordFormat[size];
|
||||
for(int i = 0; i < size; i++) {
|
||||
this.wordFormats[i] = wordFormats.get(i);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public char format(char original, IWordContext context) {
|
||||
char c = original;
|
||||
for(int i = 0; i < size; i++) {
|
||||
IWordFormat charFormat = wordFormats[i];
|
||||
c = charFormat.format(c, context);
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -14,6 +14,7 @@ import java.util.List;
|
||||
* @since 0.0.5
|
||||
*/
|
||||
@ThreadSafe
|
||||
@Deprecated
|
||||
public abstract class WordFormatInit implements IWordFormat {
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,13 +1,10 @@
|
||||
package com.github.houbb.sensitive.word.support.format;
|
||||
|
||||
import com.github.houbb.heaven.support.pipeline.Pipeline;
|
||||
import com.github.houbb.heaven.util.guava.Guavas;
|
||||
import com.github.houbb.heaven.util.util.ArrayUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordFormat;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
@@ -29,14 +26,8 @@ public final class WordFormats {
|
||||
return none();
|
||||
}
|
||||
|
||||
return new WordFormatInit() {
|
||||
@Override
|
||||
protected void init(Pipeline<IWordFormat> pipeline) {
|
||||
for(IWordFormat charFormat : charFormats) {
|
||||
pipeline.addLast(charFormat);
|
||||
}
|
||||
}
|
||||
};
|
||||
List<IWordFormat> wordFormats = new ArrayList<>(charFormats.length);
|
||||
return array(wordFormats);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -44,19 +35,12 @@ public final class WordFormats {
|
||||
* @param charFormats 列表
|
||||
* @return 结果
|
||||
*/
|
||||
public static IWordFormat chains(final Collection<IWordFormat> charFormats) {
|
||||
public static IWordFormat chains(final List<IWordFormat> charFormats) {
|
||||
if(CollectionUtil.isEmpty(charFormats)) {
|
||||
return none();
|
||||
}
|
||||
|
||||
return new WordFormatInit() {
|
||||
@Override
|
||||
protected void init(Pipeline<IWordFormat> pipeline) {
|
||||
for(IWordFormat charFormat : charFormats) {
|
||||
pipeline.addLast(charFormat);
|
||||
}
|
||||
}
|
||||
};
|
||||
return array(charFormats);
|
||||
}
|
||||
|
||||
public static IWordFormat none() {
|
||||
@@ -82,4 +66,8 @@ public final class WordFormats {
|
||||
return WordFormatIgnoreWidth.getInstance();
|
||||
}
|
||||
|
||||
public static IWordFormat array(final List<IWordFormat> wordFormats) {
|
||||
return new WordFormatArray(wordFormats);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -35,8 +35,9 @@ public final class InnerWordFormatUtils {
|
||||
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
IWordFormat charFormat = context.wordFormat();
|
||||
char[] chars = original.toCharArray();
|
||||
for(char c : chars) {
|
||||
int len = original.length();
|
||||
for(int i = 0; i < len; i++) {
|
||||
char c = original.charAt(i);
|
||||
char cf = charFormat.format(c, context);
|
||||
stringBuilder.append(cf);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user