opt toCharArray

This commit is contained in:
binbin.hou
2025-09-04 18:47:41 +08:00
parent 5d87a93cbd
commit 3488319f29
19 changed files with 81 additions and 51 deletions

View File

@@ -456,3 +456,9 @@
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|------------|:------------------|:---------------------------------------------------|
| 1 | O | 优化 char 映射 | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
# release_0.29.0
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|-------------------------------|:------------------|:---------------------------------------------------|
| 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |

View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.28.0</version>
<version>0.29.0-SNAPSHOT</version>
<properties>
<!--============================== All Plugins START ==============================-->

View File

@@ -11,12 +11,12 @@ public interface ISensitiveWordCharIgnore {
/**
* 是否忽略当前字符
* @param ix 下标志
* @param chars 字符数组
* @param text 字符
* @param innerContext 上下文
* @return 结果
*/
boolean ignore(final int ix,
final char[] chars,
final String text,
InnerSensitiveWordContext innerContext);
}

View File

@@ -14,11 +14,11 @@ public interface IWordReplace {
* 説明:废弃以前的字符串返回,减少对象创建,提升性能。
*
* @param stringBuilder 字符串连接器
* @param rawChars 原始字符串
* @param rawText 原始字符串
* @param wordResult 当前的敏感词结果
* @param wordContext 上下文
* @since 0.4.0
*/
void replace(final StringBuilder stringBuilder, final char[] rawChars, final IWordResult wordResult, final IWordContext wordContext);
void replace(final StringBuilder stringBuilder, final String rawText, final IWordResult wordResult, final IWordContext wordContext);
}

View File

@@ -47,7 +47,6 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
// 注意边界
int startIndex = 0;
char[] rawChars = target.toCharArray();
for(IWordResult wordResult : allList) {
final int itemStartIx = wordResult.startIndex();
@@ -55,19 +54,19 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
// 脱敏的左边
if(startIndex < itemStartIx) {
stringBuilder.append(rawChars, startIndex, itemStartIx-startIndex);
stringBuilder.append(target, startIndex, itemStartIx);
}
// 脱敏部分
replace.replace(stringBuilder, rawChars, wordResult, context);
replace.replace(stringBuilder, target, wordResult, context);
// 更新结尾
startIndex = Math.max(startIndex, itemEndIx);
}
// 最后部分
if (startIndex < rawChars.length) {
stringBuilder.append(rawChars, startIndex, rawChars.length-startIndex);
if (startIndex < target.length()) {
stringBuilder.append(target, startIndex, target.length());
}
return stringBuilder.toString();

View File

@@ -45,7 +45,6 @@ public abstract class AbstractConditionWordCheck extends AbstractWordCheck {
final ISensitiveWordCharIgnore charIgnore = checkContext.wordContext().charIgnore();
final String txt = checkContext.originalText();
final char[] chars = txt.toCharArray();
final IWordContext context = checkContext.wordContext();
final Map<Character, Character> formatCharMapping = checkContext.formatCharMapping();
@@ -59,7 +58,7 @@ public abstract class AbstractConditionWordCheck extends AbstractWordCheck {
currentIx = i;
// 是否忽略?
boolean ignoreCharFlag = charIgnore.ignore(currentIx, chars, checkContext);
boolean ignoreCharFlag = charIgnore.ignore(currentIx, txt, checkContext);
if(ignoreCharFlag) {
tempIgnoreLen++;

View File

@@ -46,19 +46,18 @@ public class WordCheckWord extends AbstractWordCheck {
final boolean failFast = context.wordFailFast();
StringBuilder stringBuilder = new StringBuilder();
char[] rawChars = txt.toCharArray();
int tempLen = 0;
int maxWhite = 0;
int maxBlack = 0;
int skipLen = 0;
for (int i = beginIndex; i < rawChars.length; i++) {
if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) {
for (int i = beginIndex; i < txt.length(); i++) {
if (wordCharIgnore.ignore(i, txt, innerContext) && tempLen != 0) {
tempLen++;
skipLen++;
continue;
}
char mappingChar = InnerWordFormatUtils.getMappingChar(formatCharMapping, rawChars[i]);
char mappingChar = InnerWordFormatUtils.getMappingChar(formatCharMapping, txt.charAt(i));
stringBuilder.append(mappingChar);
tempLen++;

View File

@@ -51,15 +51,14 @@ public class WordDataHashMap extends AbstractWordData {
}
// 用来按照相应的格式保存敏感词库数据
char[] chars = key.toCharArray();
final int size = chars.length;
final int size = key.length();
// 每一个新词的循环,直接将结果设置为当前 map所有变化都会体现在结果的 map 中
Map currentMap = newInnerWordMap;
for (int i = 0; i < size; i++) {
// 截取敏感词当中的字在敏感词库中字为HashMap对象的Key键值
char charKey = chars[i];
char charKey = key.charAt(i);
// 如果集合存在
Object wordMap = currentMap.get(charKey);

View File

@@ -165,9 +165,10 @@ public class WordDataTree extends AbstractWordData {
*/
private void addWord(WordDataTreeNode newRoot, String word) {
WordDataTreeNode tempNode = newRoot;
char[] chars = word.toCharArray();
for (char c : chars) {
for (int i = 0; i < word.length(); i++) {
// 获取子节点
char c = word.charAt(i);
WordDataTreeNode subNode = tempNode.getSubNode(c);
if (subNode == null) {
subNode = new WordDataTreeNode();
@@ -188,11 +189,12 @@ public class WordDataTree extends AbstractWordData {
WordDataTreeNode tempNode = root;
//需要删除的
Map<Character, WordDataTreeNode> map = new HashMap<>();
char[] chars = word.toCharArray();
int length = chars.length;
int length = word.length();
for (int i = 0; i < length; i++) {
final char c = word.charAt(i);
//不存在第一个词
WordDataTreeNode subNode = tempNode.getSubNode(chars[i]);
WordDataTreeNode subNode = tempNode.getSubNode(c);
if (subNode == null) {
return;
}
@@ -210,7 +212,7 @@ public class WordDataTree extends AbstractWordData {
if (subNode.end()) {
map.clear();
}
map.put(chars[i], tempNode);
map.put(c, tempNode);
tempNode = subNode;
}

View File

@@ -9,11 +9,11 @@ import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
*/
public abstract class AbstractSensitiveWordCharIgnore implements ISensitiveWordCharIgnore {
protected abstract boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext);
protected abstract boolean doIgnore(int ix, String text, InnerSensitiveWordContext innerContext);
@Override
public boolean ignore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
return doIgnore(ix, chars, innerContext);
public boolean ignore(int ix, String text, InnerSensitiveWordContext innerContext) {
return doIgnore(ix, text, innerContext);
}
}

View File

@@ -9,7 +9,7 @@ import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
public class NoneSensitiveWordCharIgnore extends AbstractSensitiveWordCharIgnore {
@Override
protected boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
protected boolean doIgnore(int ix, String text, InnerSensitiveWordContext innerContext) {
return false;
}

View File

@@ -20,8 +20,8 @@ public class SpecialCharSensitiveWordCharIgnore extends AbstractSensitiveWordCha
}
@Override
protected boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
char c = chars[ix];
protected boolean doIgnore(int ix, String text, InnerSensitiveWordContext innerContext) {
char c = text.charAt(ix);
return SET.contains(c);
}

View File

@@ -29,7 +29,7 @@ public class WordReplaceChar implements IWordReplace {
}
@Override
public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) {
public void replace(StringBuilder stringBuilder, final String rawText, IWordResult wordResult, IWordContext wordContext) {
int wordLen = wordResult.endIndex() - wordResult.startIndex();
for(int i = 0; i < wordLen; i++) {
stringBuilder.append(replaceChar);

View File

@@ -26,7 +26,7 @@ public class WordResultHandlerWord extends AbstractWordResultHandler<String> {
@Override
protected String doHandle(IWordResult wordResult, IWordContext wordContext, String originalText) {
// 截取
return InnerWordCharUtils.getString(originalText.toCharArray(), wordResult);
return InnerWordCharUtils.getString(originalText, wordResult);
}
}

View File

@@ -21,7 +21,7 @@ public class WordResultHandlerWordTags extends AbstractWordResultHandler<WordTag
WordTagsDto dto = new WordTagsDto();
// 截取
String word = InnerWordCharUtils.getString(originalText.toCharArray(), wordResult);
String word = InnerWordCharUtils.getString(originalText, wordResult);
// 获取 tags (使用清理后的单词查找标签)
Set<String> wordTags = InnerWordTagUtils.tags(word, wordContext);

View File

@@ -17,9 +17,8 @@ public class InnerCharUtils {
int sum = 0;
int weight = 1;
char[] chars = text.toCharArray();
for(int i = len-1; i >= 0; i--) {
int val = getCharInt(chars[i]);
int val = getCharInt(text.charAt(i));
sum += weight * val;

View File

@@ -74,13 +74,14 @@ public final class InnerWordCharUtils {
* @return 结果
* @since 0.5.0
*/
public static String getString(final char[] chars,
final int startIndex,
final int endIndex) {
// 截取
int len = endIndex - startIndex;
return new String(chars, startIndex, len);
}
// @Deprecated
// public static String getString(final char[] chars,
// final int startIndex,
// final int endIndex) {
// // 截取
// int len = endIndex - startIndex;
// return new String(chars, startIndex, len);
// }
/**
* 构建字符串
@@ -89,9 +90,35 @@ public final class InnerWordCharUtils {
* @return 结果
* @since 0.5.0
*/
public static String getString(final char[] chars,
// @Deprecated
// public static String getString(final char[] chars,
// final IWordResult wordResult) {
// return getString(chars, wordResult.startIndex(), wordResult.endIndex());
// }
/**
* 构建字符串
* @param text 字符串
* @param startIndex 开始位置
* @param endIndex 结束位置
* @return 结果
* @since 0.29.0
*/
public static String getString(final String text,
final int startIndex,
final int endIndex) {
return text.substring(startIndex, endIndex);
}
/**
* 构建字符串
* @param text 字符串
* @param wordResult 结果
* @return 结果
* @since 0.29.0
*/
public static String getString(final String text,
final IWordResult wordResult) {
return getString(chars, wordResult.startIndex(), wordResult.endIndex());
return getString(text, wordResult.startIndex(), wordResult.endIndex());
}
}

View File

@@ -89,10 +89,10 @@ public final class InnerWordNumUtils {
return string;
}
char[] chars = string.toCharArray();
StringBuilder stringBuilder = new StringBuilder(chars.length);
for(char c : chars) {
char mapChar = getMappingChar(c);
int length = string.length();
StringBuilder stringBuilder = new StringBuilder(length);
for(int i = 0; i < length; i++) {
char mapChar = getMappingChar(string.charAt(i));
//TODO: stop word 的处理
stringBuilder.append(mapChar);

View File

@@ -14,8 +14,8 @@ import com.github.houbb.sensitive.word.utils.InnerWordCharUtils;
public class MyWordReplace implements IWordReplace {
@Override
public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) {
String sensitiveWord = InnerWordCharUtils.getString(rawChars, wordResult);
public void replace(StringBuilder stringBuilder, final String rawText, IWordResult wordResult, IWordContext wordContext) {
String sensitiveWord = InnerWordCharUtils.getString(rawText, wordResult);
// 自定义不同的敏感词替换策略,可以从数据库等地方读取
if("五星红旗".equals(sensitiveWord)) {
stringBuilder.append("国家旗帜");