diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md
index 695302b..297127e 100644
--- a/CHANGE_LOG.md
+++ b/CHANGE_LOG.md
@@ -463,4 +463,11 @@
|:---|:-----|---------------------------------|:------------------|:---------------------------------------------------|
| 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
| 2 | O | 移除引导类公共方法的 check | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
-| 3 | A | 新增 warmUp | 2025-9-4 16:22:24 | |
\ No newline at end of file
+| 3 | A | 新增 warmUp | 2025-9-4 16:22:24 | |
+
+# release_0.29.1
+
+| 序号 | 变更类型 | 说明 | 时间 | 备注 |
+|:---|:-----|-----------------------------|:------------------|:--------------------|
+| 1 | O | 改进 check、format 的 chains 方法 | 2025-9-5 16:22:24 | 优化性能 |
+| 2 | O | InnerWordFormatUtils#format | 2025-9-5 16:22:24 | 优化性能+内存 toCharArray |
diff --git a/README.md b/README.md
index 6d422a8..6420574 100644
--- a/README.md
+++ b/README.md
@@ -110,7 +110,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大
com.github.houbb
sensitive-word
- 0.29.0
+ 0.29.1
```
@@ -1365,25 +1365,13 @@ ps: 不同环境会有差异,但是比例基本稳定。
# 后期 road-map
-- [x] 移除单个汉字的敏感词,在中国,要把词组当做一次词,降低误判率。
+- [] fastutil 优化 jdk 内置集合类
-- [x] 支持单个的敏感词变化?
+- [] 中文转换优化-opencc4j 内存+性能优化
-remove、add、edit?
+WordFormatIgnoreChineseStyle 转换类不够精简,可以优化一下。采用直接映射(收口在 opencc4j)。
-- [x] 敏感词标签接口支持
-
-- [x] 敏感词处理时标签支持
-
-- [x] wordData 的内存占用对比 + 优化
-
-- [x] 用户指定自定义的词组,同时允许指定词组的组合获取,更加灵活
-
-FormatCombine/CheckCombine/AllowDenyCombine 组合策略,允许用户自定义。
-
-- [ ] word check 策略的优化,统一遍历+转换
-
-- [ ] 添加 ThreadLocal 等性能优化
+- [] 各种其他涉及到 char 拆箱的地方改进
# 拓展阅读
diff --git a/pom.xml b/pom.xml
index 2e3144e..115dfd7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
com.github.houbb
sensitive-word
- 0.29.0
+ 0.29.1
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckArray.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckArray.java
new file mode 100644
index 0000000..e036f66
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckArray.java
@@ -0,0 +1,47 @@
+package com.github.houbb.sensitive.word.support.check;
+
+import com.github.houbb.heaven.util.common.ArgUtil;
+import com.github.houbb.sensitive.word.api.IWordCheck;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
+import com.github.houbb.sensitive.word.support.result.WordLengthResult;
+
+import java.util.List;
+
+/**
+ * 集合
+ * @author binbin.hou
+ * @since 0.30.0
+ */
+public class WordCheckArray implements IWordCheck {
+
+ private final IWordCheck[] sensitiveChecks;
+ private final int size;
+ public WordCheckArray(List sensitiveChecks) {
+ ArgUtil.notEmpty(sensitiveChecks, "sensitiveChecks");
+
+ this.size = sensitiveChecks.size();
+ this.sensitiveChecks = new IWordCheck[size];
+ for(int i = 0; i < size; i++) {
+ this.sensitiveChecks[i] = sensitiveChecks.get(i);
+ }
+ }
+
+ @Override
+ public WordCheckResult sensitiveCheck(int beginIndex, InnerSensitiveWordContext checkContext) {
+ // 循环调用
+ for(int i = 0; i < size; i++) {
+ IWordCheck sensitiveCheck = sensitiveChecks[i];
+ WordCheckResult result = sensitiveCheck.sensitiveCheck(beginIndex, checkContext);
+
+ WordLengthResult wordLengthResult = result.wordLengthResult();
+ if(wordLengthResult.wordAllowLen() > 0 || wordLengthResult.wordDenyLen()> 0) {
+ return result;
+ }
+ }
+
+ // 这里直接进行正则表达式相关的调用。
+ // 默认返回 0
+ return WordCheckNone.getNoneResult();
+ }
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckInit.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckInit.java
index d7f3c2b..9234c9d 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckInit.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckInit.java
@@ -12,6 +12,7 @@ import java.util.List;
* 检测初始化类
* @since 0.3.0
*/
+@Deprecated
public abstract class WordCheckInit implements IWordCheck {
/**
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java
index d173c2d..5fff684 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java
@@ -1,13 +1,10 @@
package com.github.houbb.sensitive.word.support.check;
-import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.util.util.ArrayUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.IWordCheck;
-import com.github.houbb.sensitive.word.api.IWordContext;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.List;
/**
@@ -23,29 +20,16 @@ public final class WordChecks {
return none();
}
- return new WordCheckInit() {
- @Override
- protected void init(Pipeline pipeline) {
- for(IWordCheck check : sensitiveChecks) {
- pipeline.addLast(check);
- }
- }
- };
+ List wordChecks = new ArrayList<>(sensitiveChecks.length);
+ return array(wordChecks);
}
- public static IWordCheck chains(final Collection sensitiveChecks) {
+ public static IWordCheck chains(final List sensitiveChecks) {
if (CollectionUtil.isEmpty(sensitiveChecks)){
return none();
}
- return new WordCheckInit() {
- @Override
- protected void init(Pipeline pipeline) {
- for(IWordCheck check : sensitiveChecks) {
- pipeline.addLast(check);
- }
- }
- };
+ return array(sensitiveChecks);
}
public static IWordCheck email() {
@@ -88,4 +72,14 @@ public final class WordChecks {
return WordCheckUrlNoPrefix.getInstance();
}
+ /**
+ * 集合
+ *
+ * @return 实现
+ * @since 0.30.0
+ */
+ public static IWordCheck array(final List wordChecks) {
+ return new WordCheckArray(wordChecks);
+ }
+
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTreeNode.java b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTreeNode.java
index dc5fa34..d44a4c3 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTreeNode.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTreeNode.java
@@ -31,7 +31,7 @@ public class WordDataTreeNode implements ISensitiveWordDestroy {
return this;
}
- public WordDataTreeNode getSubNode(final char c) {
+ public WordDataTreeNode getSubNode(final Character c) {
if(subNodeMap == null) {
return null;
}
@@ -52,14 +52,14 @@ public class WordDataTreeNode implements ISensitiveWordDestroy {
subNodeMap=null;
}
- public void removeNode(final char c) {
+ public void removeNode(final Character c) {
if (subNodeMap == null) {
return;
}
subNodeMap.remove(c);
}
- public WordDataTreeNode addSubNode(char c, WordDataTreeNode subNode) {
+ public WordDataTreeNode addSubNode(Character c, WordDataTreeNode subNode) {
if(this.subNodeMap == null) {
subNodeMap = new HashMap<>();
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatArray.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatArray.java
new file mode 100644
index 0000000..77fe26d
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatArray.java
@@ -0,0 +1,39 @@
+package com.github.houbb.sensitive.word.support.format;
+
+import com.github.houbb.heaven.util.common.ArgUtil;
+import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.api.IWordFormat;
+
+import java.util.List;
+
+/**
+ * 直接列表调用
+ * @author binbin.hou
+ * @since 0.30.0
+ */
+public class WordFormatArray implements IWordFormat {
+
+ private final IWordFormat[] wordFormats;
+ private final int size;
+ public WordFormatArray(List wordFormats) {
+ ArgUtil.notEmpty(wordFormats, "wordFormats");
+
+ this.size = wordFormats.size();
+ this.wordFormats = new IWordFormat[size];
+ for(int i = 0; i < size; i++) {
+ this.wordFormats[i] = wordFormats.get(i);
+ }
+ }
+
+ @Override
+ public char format(char original, IWordContext context) {
+ char c = original;
+ for(int i = 0; i < size; i++) {
+ IWordFormat charFormat = wordFormats[i];
+ c = charFormat.format(c, context);
+ }
+
+ return c;
+ }
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatInit.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatInit.java
index 2c0c4fd..c047d59 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatInit.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatInit.java
@@ -14,6 +14,7 @@ import java.util.List;
* @since 0.0.5
*/
@ThreadSafe
+@Deprecated
public abstract class WordFormatInit implements IWordFormat {
/**
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java
index eceb3b4..505ea0d 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java
@@ -1,13 +1,10 @@
package com.github.houbb.sensitive.word.support.format;
-import com.github.houbb.heaven.support.pipeline.Pipeline;
-import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.heaven.util.util.ArrayUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.IWordFormat;
-import com.github.houbb.sensitive.word.api.IWordContext;
-import java.util.Collection;
+import java.util.ArrayList;
import java.util.List;
/**
@@ -29,14 +26,8 @@ public final class WordFormats {
return none();
}
- return new WordFormatInit() {
- @Override
- protected void init(Pipeline pipeline) {
- for(IWordFormat charFormat : charFormats) {
- pipeline.addLast(charFormat);
- }
- }
- };
+ List wordFormats = new ArrayList<>(charFormats.length);
+ return array(wordFormats);
}
/**
@@ -44,19 +35,12 @@ public final class WordFormats {
* @param charFormats 列表
* @return 结果
*/
- public static IWordFormat chains(final Collection charFormats) {
+ public static IWordFormat chains(final List charFormats) {
if(CollectionUtil.isEmpty(charFormats)) {
return none();
}
- return new WordFormatInit() {
- @Override
- protected void init(Pipeline pipeline) {
- for(IWordFormat charFormat : charFormats) {
- pipeline.addLast(charFormat);
- }
- }
- };
+ return array(charFormats);
}
public static IWordFormat none() {
@@ -82,4 +66,8 @@ public final class WordFormats {
return WordFormatIgnoreWidth.getInstance();
}
+ public static IWordFormat array(final List wordFormats) {
+ return new WordFormatArray(wordFormats);
+ }
+
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java
index 25cdb7c..a03775f 100644
--- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java
+++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java
@@ -35,8 +35,9 @@ public final class InnerWordFormatUtils {
StringBuilder stringBuilder = new StringBuilder();
IWordFormat charFormat = context.wordFormat();
- char[] chars = original.toCharArray();
- for(char c : chars) {
+ int len = original.length();
+ for(int i = 0; i < len; i++) {
+ char c = original.charAt(i);
char cf = charFormat.format(c, context);
stringBuilder.append(cf);
}