mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
release branch 0.5.0
This commit is contained in:
@@ -170,3 +170,11 @@
|
||||
| 1 | O | 优化单词校验逻辑 | 2023-06-08 23:51:58 | |
|
||||
| 2 | A | 新增是否单词校验的开关 | 2023-06-08 23:51:58 | |
|
||||
|
||||
|
||||
# release_0.5.0
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|-----------------------------|:--------------------|:-------|
|
||||
| 1 | A | 优化单词结果,减少 String 创建 | 2023-06-08 23:51:58 | |
|
||||
| 2 | A | 优化 contains 判断,减少 String 创建 | 2023-06-08 23:51:58 | |
|
||||
|
||||
|
||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.4.0</version>
|
||||
<version>0.5.0</version>
|
||||
|
||||
<properties>
|
||||
<!--============================== All Plugins START ==============================-->
|
||||
|
||||
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
|
||||
|
||||
:: 版本号信息(需要手动指定)
|
||||
:::: 旧版本名称
|
||||
SET version=0.4.0
|
||||
SET version=0.5.0
|
||||
:::: 新版本名称
|
||||
SET newVersion=0.5.0
|
||||
SET newVersion=0.6.0
|
||||
:::: 组织名称
|
||||
SET groupName=com.github.houbb
|
||||
:::: 项目名称
|
||||
|
||||
@@ -2,10 +2,8 @@ package com.github.houbb.sensitive.word.api;
|
||||
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 敏感词 map
|
||||
@@ -24,13 +22,13 @@ public interface IWordMap {
|
||||
|
||||
/**
|
||||
* 是否包含敏感词
|
||||
* @param string 字符串
|
||||
* @param stringBuilder 缓冲
|
||||
* @param context 上下文
|
||||
* @return 是否包含
|
||||
* @since 0.0.1
|
||||
* @since 0.5.0
|
||||
* @see ValidModeEnum#FAIL_FAST 建议使用快速返回模式
|
||||
*/
|
||||
WordContainsTypeEnum contains(final String string,
|
||||
WordContainsTypeEnum contains(final StringBuilder stringBuilder,
|
||||
final IWordContext context);
|
||||
|
||||
}
|
||||
|
||||
@@ -7,13 +7,6 @@ package com.github.houbb.sensitive.word.api;
|
||||
*/
|
||||
public interface IWordResult {
|
||||
|
||||
/**
|
||||
* 敏感词
|
||||
* @return 敏感词
|
||||
* @since 0.1.0
|
||||
*/
|
||||
String word();
|
||||
|
||||
/**
|
||||
* 开始下标
|
||||
* @return 开始下标
|
||||
|
||||
@@ -10,9 +10,13 @@ public interface IWordResultHandler<R> {
|
||||
/**
|
||||
* 对于结果的处理
|
||||
* @param wordResult 结果
|
||||
* @param wordContext 上下文
|
||||
* @param originalText 原始文本
|
||||
* @return 处理结果
|
||||
* @since 0.1.0
|
||||
*/
|
||||
R handle(final IWordResult wordResult);
|
||||
R handle(final IWordResult wordResult,
|
||||
final IWordContext wordContext,
|
||||
final String originalText);
|
||||
|
||||
}
|
||||
|
||||
@@ -434,7 +434,7 @@ public class SensitiveWordBs {
|
||||
return CollectionUtil.toList(wordResults, new IHandler<IWordResult, R>() {
|
||||
@Override
|
||||
public R handle(IWordResult wordResult) {
|
||||
return handler.handle(wordResult);
|
||||
return handler.handle(wordResult, context, target);
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -453,7 +453,7 @@ public class SensitiveWordBs {
|
||||
ArgUtil.notNull(handler, "handler");
|
||||
|
||||
IWordResult wordResult = sensitiveWord.findFirst(target, context);
|
||||
return handler.handle(wordResult);
|
||||
return handler.handle(wordResult, context, target);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -57,14 +57,9 @@ public class SensitiveWord extends AbstractSensitiveWord {
|
||||
int wordLength = checkResult.index();
|
||||
if (wordLength > 0) {
|
||||
// 保存敏感词
|
||||
// TODO: 这其实是一个比较消耗的操作,后续可以考虑简化掉。
|
||||
String sensitiveWord = text.substring(i, i + wordLength);
|
||||
|
||||
// 添加去重
|
||||
WordResult wordResult = WordResult.newInstance()
|
||||
.startIndex(i)
|
||||
.endIndex(i+wordLength)
|
||||
.word(sensitiveWord);
|
||||
.endIndex(i+wordLength);
|
||||
resultList.add(wordResult);
|
||||
|
||||
// 快速返回
|
||||
|
||||
@@ -45,7 +45,7 @@ public class SensitiveCheckWord extends AbstractSensitiveCheck {
|
||||
stringBuilder.append(mappingChar);
|
||||
|
||||
// 判断是否存在
|
||||
WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder.toString(), context);
|
||||
WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder, context);
|
||||
if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) {
|
||||
actualLength = stringBuilder.length();
|
||||
|
||||
|
||||
@@ -94,28 +94,30 @@ public class WordMap implements IWordMap {
|
||||
* (1)直接遍历所有
|
||||
* (2)如果遇到,则直接返回 true
|
||||
*
|
||||
* @param string 字符串
|
||||
* @param stringBuilder 字符串
|
||||
* @return 是否包含
|
||||
* @since 0.0.1
|
||||
*/
|
||||
@Override
|
||||
public WordContainsTypeEnum contains(String string, final IWordContext context) {
|
||||
if (StringUtil.isEmpty(string)) {
|
||||
public WordContainsTypeEnum contains(StringBuilder stringBuilder, final IWordContext context) {
|
||||
if (stringBuilder == null
|
||||
|| stringBuilder.length() <= 0) {
|
||||
return WordContainsTypeEnum.NOT_FOUND;
|
||||
}
|
||||
|
||||
return innerContainsSensitive(string, context);
|
||||
return innerContainsSensitive(stringBuilder, context);
|
||||
}
|
||||
|
||||
private WordContainsTypeEnum innerContainsSensitive(String txt,
|
||||
private WordContainsTypeEnum innerContainsSensitive(StringBuilder stringBuilder,
|
||||
IWordContext context) {
|
||||
// 初始化为当前的 map
|
||||
Map nowMap = this.innerWordMap;
|
||||
|
||||
// 记录敏感词的长度
|
||||
for (int i = 0; i < txt.length(); i++) {
|
||||
final int len = stringBuilder.length();
|
||||
for (int i = 0; i < len; i++) {
|
||||
// 获取当前的 map 信息
|
||||
nowMap = getNowMap(nowMap, context, txt, i);
|
||||
nowMap = getNowMap(nowMap, context, stringBuilder, i);
|
||||
|
||||
// 如果不为空,则判断是否为结尾。
|
||||
if (ObjectUtil.isNull(nowMap)) {
|
||||
@@ -155,16 +157,16 @@ public class WordMap implements IWordMap {
|
||||
* 获取当前的 Map
|
||||
* @param nowMap 原始的当前 map
|
||||
* @param context 上下文
|
||||
* @param txt 文本信息
|
||||
* @param stringBuilder 文本缓存
|
||||
* @param index 下标
|
||||
* @return 实际的当前 map
|
||||
* @since 0.0.7
|
||||
*/
|
||||
private Map getNowMap(Map nowMap,
|
||||
final IWordContext context,
|
||||
final String txt,
|
||||
final StringBuilder stringBuilder,
|
||||
final int index) {
|
||||
char c = txt.charAt(index);
|
||||
char c = stringBuilder.charAt(index);
|
||||
char mappingChar = context.charFormat().format(c, context);
|
||||
|
||||
// 这里做一次重复词的处理
|
||||
@@ -173,7 +175,7 @@ public class WordMap implements IWordMap {
|
||||
// 启用忽略重复&当前下标不是第一个
|
||||
if(context.ignoreRepeat()
|
||||
&& index > 0) {
|
||||
char preChar = txt.charAt(index-1);
|
||||
char preChar = stringBuilder.charAt(index-1);
|
||||
char preMappingChar = context.charFormat().format(preChar, context);
|
||||
|
||||
// 直接赋值为上一个 map
|
||||
|
||||
@@ -8,8 +8,6 @@ import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
*/
|
||||
public class WordResult implements IWordResult {
|
||||
|
||||
private String word;
|
||||
|
||||
private int startIndex;
|
||||
|
||||
private int endIndex;
|
||||
@@ -18,16 +16,6 @@ public class WordResult implements IWordResult {
|
||||
return new WordResult();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String word() {
|
||||
return word;
|
||||
}
|
||||
|
||||
public WordResult word(String word) {
|
||||
this.word = word;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startIndex() {
|
||||
return startIndex;
|
||||
@@ -51,8 +39,7 @@ public class WordResult implements IWordResult {
|
||||
@Override
|
||||
public String toString() {
|
||||
return "WordResult{" +
|
||||
"word='" + word + '\'' +
|
||||
", startIndex=" + startIndex +
|
||||
"startIndex=" + startIndex +
|
||||
", endIndex=" + endIndex +
|
||||
'}';
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package com.github.houbb.sensitive.word.support.result;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
import com.github.houbb.sensitive.word.api.IWordResultHandler;
|
||||
|
||||
@@ -22,7 +23,7 @@ public class WordResultHandlerRaw implements IWordResultHandler<IWordResult> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public IWordResult handle(IWordResult wordResult) {
|
||||
public IWordResult handle(IWordResult wordResult, IWordContext wordContext, String originalText) {
|
||||
return wordResult;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
package com.github.houbb.sensitive.word.support.result;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
import com.github.houbb.sensitive.word.api.IWordResultHandler;
|
||||
import com.github.houbb.sensitive.word.utils.InnerCharUtils;
|
||||
|
||||
/**
|
||||
* 只保留单词
|
||||
@@ -23,11 +25,13 @@ public class WordResultHandlerWord implements IWordResultHandler<String> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String handle(IWordResult wordResult) {
|
||||
public String handle(IWordResult wordResult, IWordContext wordContext, String originalText) {
|
||||
if(wordResult == null) {
|
||||
return null;
|
||||
}
|
||||
return wordResult.word();
|
||||
|
||||
// 截取
|
||||
return InnerCharUtils.getString(originalText.toCharArray(), wordResult);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package com.github.houbb.sensitive.word.utils;
|
||||
|
||||
import com.github.houbb.heaven.util.guava.Guavas;
|
||||
import com.github.houbb.heaven.util.lang.ObjectUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
@@ -65,4 +66,32 @@ public final class InnerCharUtils {
|
||||
return character;
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建字符串
|
||||
* @param chars 字符数组
|
||||
* @param startIndex 开始位置
|
||||
* @param endIndex 结束位置
|
||||
* @return 结果
|
||||
* @since 0.5.0
|
||||
*/
|
||||
public static String getString(final char[] chars,
|
||||
final int startIndex,
|
||||
final int endIndex) {
|
||||
// 截取
|
||||
int len = endIndex - startIndex;
|
||||
return new String(chars, startIndex, len);
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建字符串
|
||||
* @param chars 字符数组
|
||||
* @param wordResult 结果
|
||||
* @return 结果
|
||||
* @since 0.5.0
|
||||
*/
|
||||
public static String getString(final char[] chars,
|
||||
final IWordResult wordResult) {
|
||||
return getString(chars, wordResult.startIndex(), wordResult.endIndex());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -7,14 +7,18 @@ import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
@Ignore
|
||||
public class BasicTest {
|
||||
public class BenchmarkBasicTest {
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* 100*100 耗时:926ms,性能較差。
|
||||
*
|
||||
* 100*100000 的字符:12942ms 第一次优化。
|
||||
* 100*100000 的字符:
|
||||
*
|
||||
* 12942ms 第一次优化。
|
||||
* 12983ms 添加对应的 contains 优化,性能无太大变化。
|
||||
*
|
||||
*/
|
||||
@Test
|
||||
public void costTimeTest() {
|
||||
@@ -34,6 +38,10 @@ public class BasicTest {
|
||||
/**
|
||||
*
|
||||
* 100*100000 的字符:12440ms
|
||||
*
|
||||
* 12111 第一次优化
|
||||
*
|
||||
* 1133 只有单词校验
|
||||
*/
|
||||
@Test
|
||||
public void costTimeOnlyWordTest() {
|
||||
@@ -43,7 +51,10 @@ public class BasicTest {
|
||||
// 1W 次
|
||||
long start = System.currentTimeMillis();
|
||||
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
|
||||
.enableWordCheck(false)
|
||||
// .enableWordCheck(false)
|
||||
.enableNumCheck(false)
|
||||
.enableUrlCheck(false)
|
||||
.enableEmailCheck(false)
|
||||
.init();
|
||||
|
||||
for(int i = 0; i < 10000; i++) {
|
||||
@@ -62,7 +62,7 @@ public class SensitiveWordHelperTest {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
List<IWordResult> wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
|
||||
Assert.assertEquals("[WordResult{word='五星红旗', startIndex=0, endIndex=4}, WordResult{word='毛主席', startIndex=9, endIndex=12}, WordResult{word='天安门', startIndex=18, endIndex=21}]", wordList.toString());
|
||||
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4}, WordResult{startIndex=9, endIndex=12}, WordResult{startIndex=18, endIndex=21}]", wordList.toString());
|
||||
}
|
||||
|
||||
|
||||
@@ -99,7 +99,7 @@ public class SensitiveWordHelperTest {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw());
|
||||
Assert.assertEquals("WordResult{word='五星红旗', startIndex=0, endIndex=4}", word.toString());
|
||||
Assert.assertEquals("WordResult{startIndex=0, endIndex=4}", word.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.replace;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
import com.github.houbb.sensitive.word.utils.InnerCharUtils;
|
||||
|
||||
/**
|
||||
* 自定义敏感词替换策略
|
||||
@@ -14,7 +15,7 @@ public class MySensitiveWordReplace implements ISensitiveWordReplace {
|
||||
|
||||
@Override
|
||||
public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) {
|
||||
String sensitiveWord = wordResult.word();
|
||||
String sensitiveWord = InnerCharUtils.getString(rawChars, wordResult);
|
||||
// 自定义不同的敏感词替换策略,可以从数据库等地方读取
|
||||
if("五星红旗".equals(sensitiveWord)) {
|
||||
stringBuilder.append("国家旗帜");
|
||||
|
||||
Reference in New Issue
Block a user