mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 00:17:35 +08:00
opt toCharArray
This commit is contained in:
@@ -456,3 +456,9 @@
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|------------|:------------------|:---------------------------------------------------|
|
||||
| 1 | O | 优化 char 映射 | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
|
||||
|
||||
# release_0.29.0
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|-------------------------------|:------------------|:---------------------------------------------------|
|
||||
| 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
|
||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.28.0</version>
|
||||
<version>0.29.0-SNAPSHOT</version>
|
||||
|
||||
<properties>
|
||||
<!--============================== All Plugins START ==============================-->
|
||||
|
||||
@@ -11,12 +11,12 @@ public interface ISensitiveWordCharIgnore {
|
||||
/**
|
||||
* 是否忽略当前字符
|
||||
* @param ix 下标志
|
||||
* @param chars 字符数组
|
||||
* @param text 字符串
|
||||
* @param innerContext 上下文
|
||||
* @return 结果
|
||||
*/
|
||||
boolean ignore(final int ix,
|
||||
final char[] chars,
|
||||
final String text,
|
||||
InnerSensitiveWordContext innerContext);
|
||||
|
||||
}
|
||||
|
||||
@@ -14,11 +14,11 @@ public interface IWordReplace {
|
||||
* 説明:废弃以前的字符串返回,减少对象创建,提升性能。
|
||||
*
|
||||
* @param stringBuilder 字符串连接器
|
||||
* @param rawChars 原始字符串
|
||||
* @param rawText 原始字符串
|
||||
* @param wordResult 当前的敏感词结果
|
||||
* @param wordContext 上下文
|
||||
* @since 0.4.0
|
||||
*/
|
||||
void replace(final StringBuilder stringBuilder, final char[] rawChars, final IWordResult wordResult, final IWordContext wordContext);
|
||||
void replace(final StringBuilder stringBuilder, final String rawText, final IWordResult wordResult, final IWordContext wordContext);
|
||||
|
||||
}
|
||||
|
||||
@@ -47,7 +47,6 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
|
||||
|
||||
// 注意边界
|
||||
int startIndex = 0;
|
||||
char[] rawChars = target.toCharArray();
|
||||
|
||||
for(IWordResult wordResult : allList) {
|
||||
final int itemStartIx = wordResult.startIndex();
|
||||
@@ -55,19 +54,19 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
|
||||
|
||||
// 脱敏的左边
|
||||
if(startIndex < itemStartIx) {
|
||||
stringBuilder.append(rawChars, startIndex, itemStartIx-startIndex);
|
||||
stringBuilder.append(target, startIndex, itemStartIx);
|
||||
}
|
||||
|
||||
// 脱敏部分
|
||||
replace.replace(stringBuilder, rawChars, wordResult, context);
|
||||
replace.replace(stringBuilder, target, wordResult, context);
|
||||
|
||||
// 更新结尾
|
||||
startIndex = Math.max(startIndex, itemEndIx);
|
||||
}
|
||||
|
||||
// 最后部分
|
||||
if (startIndex < rawChars.length) {
|
||||
stringBuilder.append(rawChars, startIndex, rawChars.length-startIndex);
|
||||
if (startIndex < target.length()) {
|
||||
stringBuilder.append(target, startIndex, target.length());
|
||||
}
|
||||
|
||||
return stringBuilder.toString();
|
||||
|
||||
@@ -45,7 +45,6 @@ public abstract class AbstractConditionWordCheck extends AbstractWordCheck {
|
||||
final ISensitiveWordCharIgnore charIgnore = checkContext.wordContext().charIgnore();
|
||||
|
||||
final String txt = checkContext.originalText();
|
||||
final char[] chars = txt.toCharArray();
|
||||
final IWordContext context = checkContext.wordContext();
|
||||
final Map<Character, Character> formatCharMapping = checkContext.formatCharMapping();
|
||||
|
||||
@@ -59,7 +58,7 @@ public abstract class AbstractConditionWordCheck extends AbstractWordCheck {
|
||||
currentIx = i;
|
||||
|
||||
// 是否忽略?
|
||||
boolean ignoreCharFlag = charIgnore.ignore(currentIx, chars, checkContext);
|
||||
boolean ignoreCharFlag = charIgnore.ignore(currentIx, txt, checkContext);
|
||||
if(ignoreCharFlag) {
|
||||
tempIgnoreLen++;
|
||||
|
||||
|
||||
@@ -46,19 +46,18 @@ public class WordCheckWord extends AbstractWordCheck {
|
||||
final boolean failFast = context.wordFailFast();
|
||||
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
char[] rawChars = txt.toCharArray();
|
||||
int tempLen = 0;
|
||||
int maxWhite = 0;
|
||||
int maxBlack = 0;
|
||||
int skipLen = 0;
|
||||
|
||||
for (int i = beginIndex; i < rawChars.length; i++) {
|
||||
if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) {
|
||||
for (int i = beginIndex; i < txt.length(); i++) {
|
||||
if (wordCharIgnore.ignore(i, txt, innerContext) && tempLen != 0) {
|
||||
tempLen++;
|
||||
skipLen++;
|
||||
continue;
|
||||
}
|
||||
char mappingChar = InnerWordFormatUtils.getMappingChar(formatCharMapping, rawChars[i]);
|
||||
char mappingChar = InnerWordFormatUtils.getMappingChar(formatCharMapping, txt.charAt(i));
|
||||
stringBuilder.append(mappingChar);
|
||||
tempLen++;
|
||||
|
||||
|
||||
@@ -51,15 +51,14 @@ public class WordDataHashMap extends AbstractWordData {
|
||||
}
|
||||
|
||||
// 用来按照相应的格式保存敏感词库数据
|
||||
char[] chars = key.toCharArray();
|
||||
final int size = chars.length;
|
||||
final int size = key.length();
|
||||
|
||||
// 每一个新词的循环,直接将结果设置为当前 map,所有变化都会体现在结果的 map 中
|
||||
Map currentMap = newInnerWordMap;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
// 截取敏感词当中的字,在敏感词库中字为HashMap对象的Key键值
|
||||
char charKey = chars[i];
|
||||
char charKey = key.charAt(i);
|
||||
// 如果集合存在
|
||||
Object wordMap = currentMap.get(charKey);
|
||||
|
||||
|
||||
@@ -165,9 +165,10 @@ public class WordDataTree extends AbstractWordData {
|
||||
*/
|
||||
private void addWord(WordDataTreeNode newRoot, String word) {
|
||||
WordDataTreeNode tempNode = newRoot;
|
||||
char[] chars = word.toCharArray();
|
||||
for (char c : chars) {
|
||||
for (int i = 0; i < word.length(); i++) {
|
||||
// 获取子节点
|
||||
char c = word.charAt(i);
|
||||
|
||||
WordDataTreeNode subNode = tempNode.getSubNode(c);
|
||||
if (subNode == null) {
|
||||
subNode = new WordDataTreeNode();
|
||||
@@ -188,11 +189,12 @@ public class WordDataTree extends AbstractWordData {
|
||||
WordDataTreeNode tempNode = root;
|
||||
//需要删除的
|
||||
Map<Character, WordDataTreeNode> map = new HashMap<>();
|
||||
char[] chars = word.toCharArray();
|
||||
int length = chars.length;
|
||||
int length = word.length();
|
||||
for (int i = 0; i < length; i++) {
|
||||
final char c = word.charAt(i);
|
||||
|
||||
//不存在第一个词
|
||||
WordDataTreeNode subNode = tempNode.getSubNode(chars[i]);
|
||||
WordDataTreeNode subNode = tempNode.getSubNode(c);
|
||||
if (subNode == null) {
|
||||
return;
|
||||
}
|
||||
@@ -210,7 +212,7 @@ public class WordDataTree extends AbstractWordData {
|
||||
if (subNode.end()) {
|
||||
map.clear();
|
||||
}
|
||||
map.put(chars[i], tempNode);
|
||||
map.put(c, tempNode);
|
||||
|
||||
tempNode = subNode;
|
||||
}
|
||||
|
||||
@@ -9,11 +9,11 @@ import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
*/
|
||||
public abstract class AbstractSensitiveWordCharIgnore implements ISensitiveWordCharIgnore {
|
||||
|
||||
protected abstract boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext);
|
||||
protected abstract boolean doIgnore(int ix, String text, InnerSensitiveWordContext innerContext);
|
||||
|
||||
@Override
|
||||
public boolean ignore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
|
||||
return doIgnore(ix, chars, innerContext);
|
||||
public boolean ignore(int ix, String text, InnerSensitiveWordContext innerContext) {
|
||||
return doIgnore(ix, text, innerContext);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
public class NoneSensitiveWordCharIgnore extends AbstractSensitiveWordCharIgnore {
|
||||
|
||||
@Override
|
||||
protected boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
|
||||
protected boolean doIgnore(int ix, String text, InnerSensitiveWordContext innerContext) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -20,8 +20,8 @@ public class SpecialCharSensitiveWordCharIgnore extends AbstractSensitiveWordCha
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
|
||||
char c = chars[ix];
|
||||
protected boolean doIgnore(int ix, String text, InnerSensitiveWordContext innerContext) {
|
||||
char c = text.charAt(ix);
|
||||
return SET.contains(c);
|
||||
}
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ public class WordReplaceChar implements IWordReplace {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) {
|
||||
public void replace(StringBuilder stringBuilder, final String rawText, IWordResult wordResult, IWordContext wordContext) {
|
||||
int wordLen = wordResult.endIndex() - wordResult.startIndex();
|
||||
for(int i = 0; i < wordLen; i++) {
|
||||
stringBuilder.append(replaceChar);
|
||||
|
||||
@@ -26,7 +26,7 @@ public class WordResultHandlerWord extends AbstractWordResultHandler<String> {
|
||||
@Override
|
||||
protected String doHandle(IWordResult wordResult, IWordContext wordContext, String originalText) {
|
||||
// 截取
|
||||
return InnerWordCharUtils.getString(originalText.toCharArray(), wordResult);
|
||||
return InnerWordCharUtils.getString(originalText, wordResult);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ public class WordResultHandlerWordTags extends AbstractWordResultHandler<WordTag
|
||||
WordTagsDto dto = new WordTagsDto();
|
||||
|
||||
// 截取
|
||||
String word = InnerWordCharUtils.getString(originalText.toCharArray(), wordResult);
|
||||
String word = InnerWordCharUtils.getString(originalText, wordResult);
|
||||
|
||||
// 获取 tags (使用清理后的单词查找标签)
|
||||
Set<String> wordTags = InnerWordTagUtils.tags(word, wordContext);
|
||||
|
||||
@@ -17,9 +17,8 @@ public class InnerCharUtils {
|
||||
int sum = 0;
|
||||
|
||||
int weight = 1;
|
||||
char[] chars = text.toCharArray();
|
||||
for(int i = len-1; i >= 0; i--) {
|
||||
int val = getCharInt(chars[i]);
|
||||
int val = getCharInt(text.charAt(i));
|
||||
|
||||
sum += weight * val;
|
||||
|
||||
|
||||
@@ -74,13 +74,14 @@ public final class InnerWordCharUtils {
|
||||
* @return 结果
|
||||
* @since 0.5.0
|
||||
*/
|
||||
public static String getString(final char[] chars,
|
||||
final int startIndex,
|
||||
final int endIndex) {
|
||||
// 截取
|
||||
int len = endIndex - startIndex;
|
||||
return new String(chars, startIndex, len);
|
||||
}
|
||||
// @Deprecated
|
||||
// public static String getString(final char[] chars,
|
||||
// final int startIndex,
|
||||
// final int endIndex) {
|
||||
// // 截取
|
||||
// int len = endIndex - startIndex;
|
||||
// return new String(chars, startIndex, len);
|
||||
// }
|
||||
|
||||
/**
|
||||
* 构建字符串
|
||||
@@ -89,9 +90,35 @@ public final class InnerWordCharUtils {
|
||||
* @return 结果
|
||||
* @since 0.5.0
|
||||
*/
|
||||
public static String getString(final char[] chars,
|
||||
// @Deprecated
|
||||
// public static String getString(final char[] chars,
|
||||
// final IWordResult wordResult) {
|
||||
// return getString(chars, wordResult.startIndex(), wordResult.endIndex());
|
||||
// }
|
||||
|
||||
/**
|
||||
* 构建字符串
|
||||
* @param text 字符串
|
||||
* @param startIndex 开始位置
|
||||
* @param endIndex 结束位置
|
||||
* @return 结果
|
||||
* @since 0.29.0
|
||||
*/
|
||||
public static String getString(final String text,
|
||||
final int startIndex,
|
||||
final int endIndex) {
|
||||
return text.substring(startIndex, endIndex);
|
||||
}
|
||||
/**
|
||||
* 构建字符串
|
||||
* @param text 字符串
|
||||
* @param wordResult 结果
|
||||
* @return 结果
|
||||
* @since 0.29.0
|
||||
*/
|
||||
public static String getString(final String text,
|
||||
final IWordResult wordResult) {
|
||||
return getString(chars, wordResult.startIndex(), wordResult.endIndex());
|
||||
return getString(text, wordResult.startIndex(), wordResult.endIndex());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -89,10 +89,10 @@ public final class InnerWordNumUtils {
|
||||
return string;
|
||||
}
|
||||
|
||||
char[] chars = string.toCharArray();
|
||||
StringBuilder stringBuilder = new StringBuilder(chars.length);
|
||||
for(char c : chars) {
|
||||
char mapChar = getMappingChar(c);
|
||||
int length = string.length();
|
||||
StringBuilder stringBuilder = new StringBuilder(length);
|
||||
for(int i = 0; i < length; i++) {
|
||||
char mapChar = getMappingChar(string.charAt(i));
|
||||
|
||||
//TODO: stop word 的处理
|
||||
stringBuilder.append(mapChar);
|
||||
|
||||
@@ -14,8 +14,8 @@ import com.github.houbb.sensitive.word.utils.InnerWordCharUtils;
|
||||
public class MyWordReplace implements IWordReplace {
|
||||
|
||||
@Override
|
||||
public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) {
|
||||
String sensitiveWord = InnerWordCharUtils.getString(rawChars, wordResult);
|
||||
public void replace(StringBuilder stringBuilder, final String rawText, IWordResult wordResult, IWordContext wordContext) {
|
||||
String sensitiveWord = InnerWordCharUtils.getString(rawText, wordResult);
|
||||
// 自定义不同的敏感词替换策略,可以从数据库等地方读取
|
||||
if("五星红旗".equals(sensitiveWord)) {
|
||||
stringBuilder.append("国家旗帜");
|
||||
|
||||
Reference in New Issue
Block a user