From 3488319f296b38ddb2f73280f873463ba316b446 Mon Sep 17 00:00:00 2001 From: "binbin.hou" Date: Thu, 4 Sep 2025 18:47:41 +0800 Subject: [PATCH] opt toCharArray --- CHANGE_LOG.md | 6 +++ pom.xml | 2 +- .../word/api/ISensitiveWordCharIgnore.java | 4 +- .../sensitive/word/api/IWordReplace.java | 4 +- .../word/core/AbstractSensitiveWord.java | 9 ++-- .../check/AbstractConditionWordCheck.java | 3 +- .../word/support/check/WordCheckWord.java | 7 ++- .../word/support/data/WordDataHashMap.java | 5 +-- .../word/support/data/WordDataTree.java | 14 +++--- .../AbstractSensitiveWordCharIgnore.java | 6 +-- .../ignore/NoneSensitiveWordCharIgnore.java | 2 +- .../SpecialCharSensitiveWordCharIgnore.java | 4 +- .../word/support/replace/WordReplaceChar.java | 2 +- .../support/result/WordResultHandlerWord.java | 2 +- .../result/WordResultHandlerWordTags.java | 2 +- .../sensitive/word/utils/InnerCharUtils.java | 3 +- .../word/utils/InnerWordCharUtils.java | 45 +++++++++++++++---- .../word/utils/InnerWordNumUtils.java | 8 ++-- .../sensitive/word/replace/MyWordReplace.java | 4 +- 19 files changed, 81 insertions(+), 51 deletions(-) diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 64c893b..2196ebf 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -456,3 +456,9 @@ | 序号 | 变更类型 | 说明 | 时间 | 备注 | |:---|:-----|------------|:------------------|:---------------------------------------------------| | 1 | O | 优化 char 映射 | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 | + +# release_0.29.0 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|-------------------------------|:------------------|:---------------------------------------------------| +| 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 | \ No newline at end of file diff --git a/pom.xml b/pom.xml index a5716ea..e4b0f79 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.28.0 + 0.29.0-SNAPSHOT diff --git a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordCharIgnore.java b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordCharIgnore.java index 1d8cc4d..c92d345 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordCharIgnore.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordCharIgnore.java @@ -11,12 +11,12 @@ public interface ISensitiveWordCharIgnore { /** * 是否忽略当前字符 * @param ix 下标志 - * @param chars 字符数组 + * @param text 字符串 * @param innerContext 上下文 * @return 结果 */ boolean ignore(final int ix, - final char[] chars, + final String text, InnerSensitiveWordContext innerContext); } diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordReplace.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordReplace.java index e847db3..70826cf 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordReplace.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordReplace.java @@ -14,11 +14,11 @@ public interface IWordReplace { * 説明:废弃以前的字符串返回,减少对象创建,提升性能。 * * @param stringBuilder 字符串连接器 - * @param rawChars 原始字符串 + * @param rawText 原始字符串 * @param wordResult 当前的敏感词结果 * @param wordContext 上下文 * @since 0.4.0 */ - void replace(final StringBuilder stringBuilder, final char[] rawChars, final IWordResult wordResult, final IWordContext wordContext); + void replace(final StringBuilder stringBuilder, final String rawText, final IWordResult wordResult, final IWordContext wordContext); } diff --git a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java index c3ce135..7807d80 100644 --- a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java @@ -47,7 +47,6 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord { // 注意边界 int startIndex = 0; - char[] rawChars = target.toCharArray(); for(IWordResult wordResult : allList) { final int itemStartIx = wordResult.startIndex(); @@ -55,19 +54,19 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord { // 脱敏的左边 if(startIndex < itemStartIx) { - stringBuilder.append(rawChars, startIndex, itemStartIx-startIndex); + stringBuilder.append(target, startIndex, itemStartIx); } // 脱敏部分 - replace.replace(stringBuilder, rawChars, wordResult, context); + replace.replace(stringBuilder, target, wordResult, context); // 更新结尾 startIndex = Math.max(startIndex, itemEndIx); } // 最后部分 - if (startIndex < rawChars.length) { - stringBuilder.append(rawChars, startIndex, rawChars.length-startIndex); + if (startIndex < target.length()) { + stringBuilder.append(target, startIndex, target.length()); } return stringBuilder.toString(); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractConditionWordCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractConditionWordCheck.java index 72b98d2..716936d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractConditionWordCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractConditionWordCheck.java @@ -45,7 +45,6 @@ public abstract class AbstractConditionWordCheck extends AbstractWordCheck { final ISensitiveWordCharIgnore charIgnore = checkContext.wordContext().charIgnore(); final String txt = checkContext.originalText(); - final char[] chars = txt.toCharArray(); final IWordContext context = checkContext.wordContext(); final Map formatCharMapping = checkContext.formatCharMapping(); @@ -59,7 +58,7 @@ public abstract class AbstractConditionWordCheck extends AbstractWordCheck { currentIx = i; // 是否忽略? - boolean ignoreCharFlag = charIgnore.ignore(currentIx, chars, checkContext); + boolean ignoreCharFlag = charIgnore.ignore(currentIx, txt, checkContext); if(ignoreCharFlag) { tempIgnoreLen++; diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java index cbeeb29..d091c66 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java @@ -46,19 +46,18 @@ public class WordCheckWord extends AbstractWordCheck { final boolean failFast = context.wordFailFast(); StringBuilder stringBuilder = new StringBuilder(); - char[] rawChars = txt.toCharArray(); int tempLen = 0; int maxWhite = 0; int maxBlack = 0; int skipLen = 0; - for (int i = beginIndex; i < rawChars.length; i++) { - if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) { + for (int i = beginIndex; i < txt.length(); i++) { + if (wordCharIgnore.ignore(i, txt, innerContext) && tempLen != 0) { tempLen++; skipLen++; continue; } - char mappingChar = InnerWordFormatUtils.getMappingChar(formatCharMapping, rawChars[i]); + char mappingChar = InnerWordFormatUtils.getMappingChar(formatCharMapping, txt.charAt(i)); stringBuilder.append(mappingChar); tempLen++; diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java index bf189c6..a4d4c6d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java @@ -51,15 +51,14 @@ public class WordDataHashMap extends AbstractWordData { } // 用来按照相应的格式保存敏感词库数据 - char[] chars = key.toCharArray(); - final int size = chars.length; + final int size = key.length(); // 每一个新词的循环,直接将结果设置为当前 map,所有变化都会体现在结果的 map 中 Map currentMap = newInnerWordMap; for (int i = 0; i < size; i++) { // 截取敏感词当中的字,在敏感词库中字为HashMap对象的Key键值 - char charKey = chars[i]; + char charKey = key.charAt(i); // 如果集合存在 Object wordMap = currentMap.get(charKey); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java index 5027f24..2fac69a 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java @@ -165,9 +165,10 @@ public class WordDataTree extends AbstractWordData { */ private void addWord(WordDataTreeNode newRoot, String word) { WordDataTreeNode tempNode = newRoot; - char[] chars = word.toCharArray(); - for (char c : chars) { + for (int i = 0; i < word.length(); i++) { // 获取子节点 + char c = word.charAt(i); + WordDataTreeNode subNode = tempNode.getSubNode(c); if (subNode == null) { subNode = new WordDataTreeNode(); @@ -188,11 +189,12 @@ public class WordDataTree extends AbstractWordData { WordDataTreeNode tempNode = root; //需要删除的 Map map = new HashMap<>(); - char[] chars = word.toCharArray(); - int length = chars.length; + int length = word.length(); for (int i = 0; i < length; i++) { + final char c = word.charAt(i); + //不存在第一个词 - WordDataTreeNode subNode = tempNode.getSubNode(chars[i]); + WordDataTreeNode subNode = tempNode.getSubNode(c); if (subNode == null) { return; } @@ -210,7 +212,7 @@ public class WordDataTree extends AbstractWordData { if (subNode.end()) { map.clear(); } - map.put(chars[i], tempNode); + map.put(c, tempNode); tempNode = subNode; } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/ignore/AbstractSensitiveWordCharIgnore.java b/src/main/java/com/github/houbb/sensitive/word/support/ignore/AbstractSensitiveWordCharIgnore.java index 880dc44..febfe31 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/ignore/AbstractSensitiveWordCharIgnore.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/ignore/AbstractSensitiveWordCharIgnore.java @@ -9,11 +9,11 @@ import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; */ public abstract class AbstractSensitiveWordCharIgnore implements ISensitiveWordCharIgnore { - protected abstract boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext); + protected abstract boolean doIgnore(int ix, String text, InnerSensitiveWordContext innerContext); @Override - public boolean ignore(int ix, char[] chars, InnerSensitiveWordContext innerContext) { - return doIgnore(ix, chars, innerContext); + public boolean ignore(int ix, String text, InnerSensitiveWordContext innerContext) { + return doIgnore(ix, text, innerContext); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/ignore/NoneSensitiveWordCharIgnore.java b/src/main/java/com/github/houbb/sensitive/word/support/ignore/NoneSensitiveWordCharIgnore.java index 631ccc2..c55fca6 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/ignore/NoneSensitiveWordCharIgnore.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/ignore/NoneSensitiveWordCharIgnore.java @@ -9,7 +9,7 @@ import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; public class NoneSensitiveWordCharIgnore extends AbstractSensitiveWordCharIgnore { @Override - protected boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext) { + protected boolean doIgnore(int ix, String text, InnerSensitiveWordContext innerContext) { return false; } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/ignore/SpecialCharSensitiveWordCharIgnore.java b/src/main/java/com/github/houbb/sensitive/word/support/ignore/SpecialCharSensitiveWordCharIgnore.java index cac8605..de598d5 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/ignore/SpecialCharSensitiveWordCharIgnore.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/ignore/SpecialCharSensitiveWordCharIgnore.java @@ -20,8 +20,8 @@ public class SpecialCharSensitiveWordCharIgnore extends AbstractSensitiveWordCha } @Override - protected boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext) { - char c = chars[ix]; + protected boolean doIgnore(int ix, String text, InnerSensitiveWordContext innerContext) { + char c = text.charAt(ix); return SET.contains(c); } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/replace/WordReplaceChar.java b/src/main/java/com/github/houbb/sensitive/word/support/replace/WordReplaceChar.java index e392948..95d04ed 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/replace/WordReplaceChar.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/replace/WordReplaceChar.java @@ -29,7 +29,7 @@ public class WordReplaceChar implements IWordReplace { } @Override - public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) { + public void replace(StringBuilder stringBuilder, final String rawText, IWordResult wordResult, IWordContext wordContext) { int wordLen = wordResult.endIndex() - wordResult.startIndex(); for(int i = 0; i < wordLen; i++) { stringBuilder.append(replaceChar); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java index af79edb..c3b60b9 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java @@ -26,7 +26,7 @@ public class WordResultHandlerWord extends AbstractWordResultHandler { @Override protected String doHandle(IWordResult wordResult, IWordContext wordContext, String originalText) { // 截取 - return InnerWordCharUtils.getString(originalText.toCharArray(), wordResult); + return InnerWordCharUtils.getString(originalText, wordResult); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java index 5e79d64..6d0d009 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java @@ -21,7 +21,7 @@ public class WordResultHandlerWordTags extends AbstractWordResultHandler wordTags = InnerWordTagUtils.tags(word, wordContext); diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java index ce7d965..55b1c2a 100644 --- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java @@ -17,9 +17,8 @@ public class InnerCharUtils { int sum = 0; int weight = 1; - char[] chars = text.toCharArray(); for(int i = len-1; i >= 0; i--) { - int val = getCharInt(chars[i]); + int val = getCharInt(text.charAt(i)); sum += weight * val; diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordCharUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordCharUtils.java index 6e9374c..ca61060 100644 --- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordCharUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordCharUtils.java @@ -74,13 +74,14 @@ public final class InnerWordCharUtils { * @return 结果 * @since 0.5.0 */ - public static String getString(final char[] chars, - final int startIndex, - final int endIndex) { - // 截取 - int len = endIndex - startIndex; - return new String(chars, startIndex, len); - } +// @Deprecated +// public static String getString(final char[] chars, +// final int startIndex, +// final int endIndex) { +// // 截取 +// int len = endIndex - startIndex; +// return new String(chars, startIndex, len); +// } /** * 构建字符串 @@ -89,9 +90,35 @@ public final class InnerWordCharUtils { * @return 结果 * @since 0.5.0 */ - public static String getString(final char[] chars, +// @Deprecated +// public static String getString(final char[] chars, +// final IWordResult wordResult) { +// return getString(chars, wordResult.startIndex(), wordResult.endIndex()); +// } + + /** + * 构建字符串 + * @param text 字符串 + * @param startIndex 开始位置 + * @param endIndex 结束位置 + * @return 结果 + * @since 0.29.0 + */ + public static String getString(final String text, + final int startIndex, + final int endIndex) { + return text.substring(startIndex, endIndex); + } + /** + * 构建字符串 + * @param text 字符串 + * @param wordResult 结果 + * @return 结果 + * @since 0.29.0 + */ + public static String getString(final String text, final IWordResult wordResult) { - return getString(chars, wordResult.startIndex(), wordResult.endIndex()); + return getString(text, wordResult.startIndex(), wordResult.endIndex()); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordNumUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordNumUtils.java index 9cfa860..ce9d8a3 100644 --- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordNumUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordNumUtils.java @@ -89,10 +89,10 @@ public final class InnerWordNumUtils { return string; } - char[] chars = string.toCharArray(); - StringBuilder stringBuilder = new StringBuilder(chars.length); - for(char c : chars) { - char mapChar = getMappingChar(c); + int length = string.length(); + StringBuilder stringBuilder = new StringBuilder(length); + for(int i = 0; i < length; i++) { + char mapChar = getMappingChar(string.charAt(i)); //TODO: stop word 的处理 stringBuilder.append(mapChar); diff --git a/src/test/java/com/github/houbb/sensitive/word/replace/MyWordReplace.java b/src/test/java/com/github/houbb/sensitive/word/replace/MyWordReplace.java index d9a0032..65c96eb 100644 --- a/src/test/java/com/github/houbb/sensitive/word/replace/MyWordReplace.java +++ b/src/test/java/com/github/houbb/sensitive/word/replace/MyWordReplace.java @@ -14,8 +14,8 @@ import com.github.houbb.sensitive.word.utils.InnerWordCharUtils; public class MyWordReplace implements IWordReplace { @Override - public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) { - String sensitiveWord = InnerWordCharUtils.getString(rawChars, wordResult); + public void replace(StringBuilder stringBuilder, final String rawText, IWordResult wordResult, IWordContext wordContext) { + String sensitiveWord = InnerWordCharUtils.getString(rawText, wordResult); // 自定义不同的敏感词替换策略,可以从数据库等地方读取 if("五星红旗".equals(sensitiveWord)) { stringBuilder.append("国家旗帜");