mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
opt toCharArray
This commit is contained in:
@@ -456,3 +456,9 @@
|
|||||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||||
|:---|:-----|------------|:------------------|:---------------------------------------------------|
|
|:---|:-----|------------|:------------------|:---------------------------------------------------|
|
||||||
| 1 | O | 优化 char 映射 | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
|
| 1 | O | 优化 char 映射 | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
|
||||||
|
|
||||||
|
# release_0.29.0
|
||||||
|
|
||||||
|
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||||
|
|:---|:-----|-------------------------------|:------------------|:---------------------------------------------------|
|
||||||
|
| 1 | O | 尽量用 string.charAt 替代 `chars[i]` | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |
|
||||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.github.houbb</groupId>
|
<groupId>com.github.houbb</groupId>
|
||||||
<artifactId>sensitive-word</artifactId>
|
<artifactId>sensitive-word</artifactId>
|
||||||
<version>0.28.0</version>
|
<version>0.29.0-SNAPSHOT</version>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<!--============================== All Plugins START ==============================-->
|
<!--============================== All Plugins START ==============================-->
|
||||||
|
|||||||
@@ -11,12 +11,12 @@ public interface ISensitiveWordCharIgnore {
|
|||||||
/**
|
/**
|
||||||
* 是否忽略当前字符
|
* 是否忽略当前字符
|
||||||
* @param ix 下标志
|
* @param ix 下标志
|
||||||
* @param chars 字符数组
|
* @param text 字符串
|
||||||
* @param innerContext 上下文
|
* @param innerContext 上下文
|
||||||
* @return 结果
|
* @return 结果
|
||||||
*/
|
*/
|
||||||
boolean ignore(final int ix,
|
boolean ignore(final int ix,
|
||||||
final char[] chars,
|
final String text,
|
||||||
InnerSensitiveWordContext innerContext);
|
InnerSensitiveWordContext innerContext);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,11 +14,11 @@ public interface IWordReplace {
|
|||||||
* 説明:废弃以前的字符串返回,减少对象创建,提升性能。
|
* 説明:废弃以前的字符串返回,减少对象创建,提升性能。
|
||||||
*
|
*
|
||||||
* @param stringBuilder 字符串连接器
|
* @param stringBuilder 字符串连接器
|
||||||
* @param rawChars 原始字符串
|
* @param rawText 原始字符串
|
||||||
* @param wordResult 当前的敏感词结果
|
* @param wordResult 当前的敏感词结果
|
||||||
* @param wordContext 上下文
|
* @param wordContext 上下文
|
||||||
* @since 0.4.0
|
* @since 0.4.0
|
||||||
*/
|
*/
|
||||||
void replace(final StringBuilder stringBuilder, final char[] rawChars, final IWordResult wordResult, final IWordContext wordContext);
|
void replace(final StringBuilder stringBuilder, final String rawText, final IWordResult wordResult, final IWordContext wordContext);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -47,7 +47,6 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
|
|||||||
|
|
||||||
// 注意边界
|
// 注意边界
|
||||||
int startIndex = 0;
|
int startIndex = 0;
|
||||||
char[] rawChars = target.toCharArray();
|
|
||||||
|
|
||||||
for(IWordResult wordResult : allList) {
|
for(IWordResult wordResult : allList) {
|
||||||
final int itemStartIx = wordResult.startIndex();
|
final int itemStartIx = wordResult.startIndex();
|
||||||
@@ -55,19 +54,19 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
|
|||||||
|
|
||||||
// 脱敏的左边
|
// 脱敏的左边
|
||||||
if(startIndex < itemStartIx) {
|
if(startIndex < itemStartIx) {
|
||||||
stringBuilder.append(rawChars, startIndex, itemStartIx-startIndex);
|
stringBuilder.append(target, startIndex, itemStartIx);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 脱敏部分
|
// 脱敏部分
|
||||||
replace.replace(stringBuilder, rawChars, wordResult, context);
|
replace.replace(stringBuilder, target, wordResult, context);
|
||||||
|
|
||||||
// 更新结尾
|
// 更新结尾
|
||||||
startIndex = Math.max(startIndex, itemEndIx);
|
startIndex = Math.max(startIndex, itemEndIx);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 最后部分
|
// 最后部分
|
||||||
if (startIndex < rawChars.length) {
|
if (startIndex < target.length()) {
|
||||||
stringBuilder.append(rawChars, startIndex, rawChars.length-startIndex);
|
stringBuilder.append(target, startIndex, target.length());
|
||||||
}
|
}
|
||||||
|
|
||||||
return stringBuilder.toString();
|
return stringBuilder.toString();
|
||||||
|
|||||||
@@ -45,7 +45,6 @@ public abstract class AbstractConditionWordCheck extends AbstractWordCheck {
|
|||||||
final ISensitiveWordCharIgnore charIgnore = checkContext.wordContext().charIgnore();
|
final ISensitiveWordCharIgnore charIgnore = checkContext.wordContext().charIgnore();
|
||||||
|
|
||||||
final String txt = checkContext.originalText();
|
final String txt = checkContext.originalText();
|
||||||
final char[] chars = txt.toCharArray();
|
|
||||||
final IWordContext context = checkContext.wordContext();
|
final IWordContext context = checkContext.wordContext();
|
||||||
final Map<Character, Character> formatCharMapping = checkContext.formatCharMapping();
|
final Map<Character, Character> formatCharMapping = checkContext.formatCharMapping();
|
||||||
|
|
||||||
@@ -59,7 +58,7 @@ public abstract class AbstractConditionWordCheck extends AbstractWordCheck {
|
|||||||
currentIx = i;
|
currentIx = i;
|
||||||
|
|
||||||
// 是否忽略?
|
// 是否忽略?
|
||||||
boolean ignoreCharFlag = charIgnore.ignore(currentIx, chars, checkContext);
|
boolean ignoreCharFlag = charIgnore.ignore(currentIx, txt, checkContext);
|
||||||
if(ignoreCharFlag) {
|
if(ignoreCharFlag) {
|
||||||
tempIgnoreLen++;
|
tempIgnoreLen++;
|
||||||
|
|
||||||
|
|||||||
@@ -46,19 +46,18 @@ public class WordCheckWord extends AbstractWordCheck {
|
|||||||
final boolean failFast = context.wordFailFast();
|
final boolean failFast = context.wordFailFast();
|
||||||
|
|
||||||
StringBuilder stringBuilder = new StringBuilder();
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
char[] rawChars = txt.toCharArray();
|
|
||||||
int tempLen = 0;
|
int tempLen = 0;
|
||||||
int maxWhite = 0;
|
int maxWhite = 0;
|
||||||
int maxBlack = 0;
|
int maxBlack = 0;
|
||||||
int skipLen = 0;
|
int skipLen = 0;
|
||||||
|
|
||||||
for (int i = beginIndex; i < rawChars.length; i++) {
|
for (int i = beginIndex; i < txt.length(); i++) {
|
||||||
if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) {
|
if (wordCharIgnore.ignore(i, txt, innerContext) && tempLen != 0) {
|
||||||
tempLen++;
|
tempLen++;
|
||||||
skipLen++;
|
skipLen++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
char mappingChar = InnerWordFormatUtils.getMappingChar(formatCharMapping, rawChars[i]);
|
char mappingChar = InnerWordFormatUtils.getMappingChar(formatCharMapping, txt.charAt(i));
|
||||||
stringBuilder.append(mappingChar);
|
stringBuilder.append(mappingChar);
|
||||||
tempLen++;
|
tempLen++;
|
||||||
|
|
||||||
|
|||||||
@@ -51,15 +51,14 @@ public class WordDataHashMap extends AbstractWordData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 用来按照相应的格式保存敏感词库数据
|
// 用来按照相应的格式保存敏感词库数据
|
||||||
char[] chars = key.toCharArray();
|
final int size = key.length();
|
||||||
final int size = chars.length;
|
|
||||||
|
|
||||||
// 每一个新词的循环,直接将结果设置为当前 map,所有变化都会体现在结果的 map 中
|
// 每一个新词的循环,直接将结果设置为当前 map,所有变化都会体现在结果的 map 中
|
||||||
Map currentMap = newInnerWordMap;
|
Map currentMap = newInnerWordMap;
|
||||||
|
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
// 截取敏感词当中的字,在敏感词库中字为HashMap对象的Key键值
|
// 截取敏感词当中的字,在敏感词库中字为HashMap对象的Key键值
|
||||||
char charKey = chars[i];
|
char charKey = key.charAt(i);
|
||||||
// 如果集合存在
|
// 如果集合存在
|
||||||
Object wordMap = currentMap.get(charKey);
|
Object wordMap = currentMap.get(charKey);
|
||||||
|
|
||||||
|
|||||||
@@ -165,9 +165,10 @@ public class WordDataTree extends AbstractWordData {
|
|||||||
*/
|
*/
|
||||||
private void addWord(WordDataTreeNode newRoot, String word) {
|
private void addWord(WordDataTreeNode newRoot, String word) {
|
||||||
WordDataTreeNode tempNode = newRoot;
|
WordDataTreeNode tempNode = newRoot;
|
||||||
char[] chars = word.toCharArray();
|
for (int i = 0; i < word.length(); i++) {
|
||||||
for (char c : chars) {
|
|
||||||
// 获取子节点
|
// 获取子节点
|
||||||
|
char c = word.charAt(i);
|
||||||
|
|
||||||
WordDataTreeNode subNode = tempNode.getSubNode(c);
|
WordDataTreeNode subNode = tempNode.getSubNode(c);
|
||||||
if (subNode == null) {
|
if (subNode == null) {
|
||||||
subNode = new WordDataTreeNode();
|
subNode = new WordDataTreeNode();
|
||||||
@@ -188,11 +189,12 @@ public class WordDataTree extends AbstractWordData {
|
|||||||
WordDataTreeNode tempNode = root;
|
WordDataTreeNode tempNode = root;
|
||||||
//需要删除的
|
//需要删除的
|
||||||
Map<Character, WordDataTreeNode> map = new HashMap<>();
|
Map<Character, WordDataTreeNode> map = new HashMap<>();
|
||||||
char[] chars = word.toCharArray();
|
int length = word.length();
|
||||||
int length = chars.length;
|
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length; i++) {
|
||||||
|
final char c = word.charAt(i);
|
||||||
|
|
||||||
//不存在第一个词
|
//不存在第一个词
|
||||||
WordDataTreeNode subNode = tempNode.getSubNode(chars[i]);
|
WordDataTreeNode subNode = tempNode.getSubNode(c);
|
||||||
if (subNode == null) {
|
if (subNode == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -210,7 +212,7 @@ public class WordDataTree extends AbstractWordData {
|
|||||||
if (subNode.end()) {
|
if (subNode.end()) {
|
||||||
map.clear();
|
map.clear();
|
||||||
}
|
}
|
||||||
map.put(chars[i], tempNode);
|
map.put(c, tempNode);
|
||||||
|
|
||||||
tempNode = subNode;
|
tempNode = subNode;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,11 +9,11 @@ import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
|||||||
*/
|
*/
|
||||||
public abstract class AbstractSensitiveWordCharIgnore implements ISensitiveWordCharIgnore {
|
public abstract class AbstractSensitiveWordCharIgnore implements ISensitiveWordCharIgnore {
|
||||||
|
|
||||||
protected abstract boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext);
|
protected abstract boolean doIgnore(int ix, String text, InnerSensitiveWordContext innerContext);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean ignore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
|
public boolean ignore(int ix, String text, InnerSensitiveWordContext innerContext) {
|
||||||
return doIgnore(ix, chars, innerContext);
|
return doIgnore(ix, text, innerContext);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
|||||||
public class NoneSensitiveWordCharIgnore extends AbstractSensitiveWordCharIgnore {
|
public class NoneSensitiveWordCharIgnore extends AbstractSensitiveWordCharIgnore {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
|
protected boolean doIgnore(int ix, String text, InnerSensitiveWordContext innerContext) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -20,8 +20,8 @@ public class SpecialCharSensitiveWordCharIgnore extends AbstractSensitiveWordCha
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
|
protected boolean doIgnore(int ix, String text, InnerSensitiveWordContext innerContext) {
|
||||||
char c = chars[ix];
|
char c = text.charAt(ix);
|
||||||
return SET.contains(c);
|
return SET.contains(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ public class WordReplaceChar implements IWordReplace {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) {
|
public void replace(StringBuilder stringBuilder, final String rawText, IWordResult wordResult, IWordContext wordContext) {
|
||||||
int wordLen = wordResult.endIndex() - wordResult.startIndex();
|
int wordLen = wordResult.endIndex() - wordResult.startIndex();
|
||||||
for(int i = 0; i < wordLen; i++) {
|
for(int i = 0; i < wordLen; i++) {
|
||||||
stringBuilder.append(replaceChar);
|
stringBuilder.append(replaceChar);
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ public class WordResultHandlerWord extends AbstractWordResultHandler<String> {
|
|||||||
@Override
|
@Override
|
||||||
protected String doHandle(IWordResult wordResult, IWordContext wordContext, String originalText) {
|
protected String doHandle(IWordResult wordResult, IWordContext wordContext, String originalText) {
|
||||||
// 截取
|
// 截取
|
||||||
return InnerWordCharUtils.getString(originalText.toCharArray(), wordResult);
|
return InnerWordCharUtils.getString(originalText, wordResult);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ public class WordResultHandlerWordTags extends AbstractWordResultHandler<WordTag
|
|||||||
WordTagsDto dto = new WordTagsDto();
|
WordTagsDto dto = new WordTagsDto();
|
||||||
|
|
||||||
// 截取
|
// 截取
|
||||||
String word = InnerWordCharUtils.getString(originalText.toCharArray(), wordResult);
|
String word = InnerWordCharUtils.getString(originalText, wordResult);
|
||||||
|
|
||||||
// 获取 tags (使用清理后的单词查找标签)
|
// 获取 tags (使用清理后的单词查找标签)
|
||||||
Set<String> wordTags = InnerWordTagUtils.tags(word, wordContext);
|
Set<String> wordTags = InnerWordTagUtils.tags(word, wordContext);
|
||||||
|
|||||||
@@ -17,9 +17,8 @@ public class InnerCharUtils {
|
|||||||
int sum = 0;
|
int sum = 0;
|
||||||
|
|
||||||
int weight = 1;
|
int weight = 1;
|
||||||
char[] chars = text.toCharArray();
|
|
||||||
for(int i = len-1; i >= 0; i--) {
|
for(int i = len-1; i >= 0; i--) {
|
||||||
int val = getCharInt(chars[i]);
|
int val = getCharInt(text.charAt(i));
|
||||||
|
|
||||||
sum += weight * val;
|
sum += weight * val;
|
||||||
|
|
||||||
|
|||||||
@@ -74,13 +74,14 @@ public final class InnerWordCharUtils {
|
|||||||
* @return 结果
|
* @return 结果
|
||||||
* @since 0.5.0
|
* @since 0.5.0
|
||||||
*/
|
*/
|
||||||
public static String getString(final char[] chars,
|
// @Deprecated
|
||||||
final int startIndex,
|
// public static String getString(final char[] chars,
|
||||||
final int endIndex) {
|
// final int startIndex,
|
||||||
// 截取
|
// final int endIndex) {
|
||||||
int len = endIndex - startIndex;
|
// // 截取
|
||||||
return new String(chars, startIndex, len);
|
// int len = endIndex - startIndex;
|
||||||
}
|
// return new String(chars, startIndex, len);
|
||||||
|
// }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 构建字符串
|
* 构建字符串
|
||||||
@@ -89,9 +90,35 @@ public final class InnerWordCharUtils {
|
|||||||
* @return 结果
|
* @return 结果
|
||||||
* @since 0.5.0
|
* @since 0.5.0
|
||||||
*/
|
*/
|
||||||
public static String getString(final char[] chars,
|
// @Deprecated
|
||||||
|
// public static String getString(final char[] chars,
|
||||||
|
// final IWordResult wordResult) {
|
||||||
|
// return getString(chars, wordResult.startIndex(), wordResult.endIndex());
|
||||||
|
// }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 构建字符串
|
||||||
|
* @param text 字符串
|
||||||
|
* @param startIndex 开始位置
|
||||||
|
* @param endIndex 结束位置
|
||||||
|
* @return 结果
|
||||||
|
* @since 0.29.0
|
||||||
|
*/
|
||||||
|
public static String getString(final String text,
|
||||||
|
final int startIndex,
|
||||||
|
final int endIndex) {
|
||||||
|
return text.substring(startIndex, endIndex);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* 构建字符串
|
||||||
|
* @param text 字符串
|
||||||
|
* @param wordResult 结果
|
||||||
|
* @return 结果
|
||||||
|
* @since 0.29.0
|
||||||
|
*/
|
||||||
|
public static String getString(final String text,
|
||||||
final IWordResult wordResult) {
|
final IWordResult wordResult) {
|
||||||
return getString(chars, wordResult.startIndex(), wordResult.endIndex());
|
return getString(text, wordResult.startIndex(), wordResult.endIndex());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -89,10 +89,10 @@ public final class InnerWordNumUtils {
|
|||||||
return string;
|
return string;
|
||||||
}
|
}
|
||||||
|
|
||||||
char[] chars = string.toCharArray();
|
int length = string.length();
|
||||||
StringBuilder stringBuilder = new StringBuilder(chars.length);
|
StringBuilder stringBuilder = new StringBuilder(length);
|
||||||
for(char c : chars) {
|
for(int i = 0; i < length; i++) {
|
||||||
char mapChar = getMappingChar(c);
|
char mapChar = getMappingChar(string.charAt(i));
|
||||||
|
|
||||||
//TODO: stop word 的处理
|
//TODO: stop word 的处理
|
||||||
stringBuilder.append(mapChar);
|
stringBuilder.append(mapChar);
|
||||||
|
|||||||
@@ -14,8 +14,8 @@ import com.github.houbb.sensitive.word.utils.InnerWordCharUtils;
|
|||||||
public class MyWordReplace implements IWordReplace {
|
public class MyWordReplace implements IWordReplace {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) {
|
public void replace(StringBuilder stringBuilder, final String rawText, IWordResult wordResult, IWordContext wordContext) {
|
||||||
String sensitiveWord = InnerWordCharUtils.getString(rawChars, wordResult);
|
String sensitiveWord = InnerWordCharUtils.getString(rawText, wordResult);
|
||||||
// 自定义不同的敏感词替换策略,可以从数据库等地方读取
|
// 自定义不同的敏感词替换策略,可以从数据库等地方读取
|
||||||
if("五星红旗".equals(sensitiveWord)) {
|
if("五星红旗".equals(sensitiveWord)) {
|
||||||
stringBuilder.append("国家旗帜");
|
stringBuilder.append("国家旗帜");
|
||||||
|
|||||||
Reference in New Issue
Block a user