release branch 0.5.0

This commit is contained in:
binbin.hou
2023-06-08 18:30:02 +08:00
parent 678686df0b
commit a4e364a94d
17 changed files with 93 additions and 60 deletions

View File

@@ -170,3 +170,11 @@
| 1 | O | 优化单词校验逻辑 | 2023-06-08 23:51:58 | |
| 2 | A | 新增是否单词校验的开关 | 2023-06-08 23:51:58 | |
# release_0.5.0
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|-----------------------------|:--------------------|:-------|
| 1 | A | 优化单词结果,减少 String 创建 | 2023-06-08 23:51:58 | |
| 2 | A | 优化 contains 判断,减少 String 创建 | 2023-06-08 23:51:58 | |

View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.4.0</version>
<version>0.5.0</version>
<properties>
<!--============================== All Plugins START ==============================-->

View File

@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
SET version=0.4.0
SET version=0.5.0
:::: 新版本名称
SET newVersion=0.5.0
SET newVersion=0.6.0
:::: 组织名称
SET groupName=com.github.houbb
:::: 项目名称

View File

@@ -2,10 +2,8 @@ package com.github.houbb.sensitive.word.api;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import java.util.Collection;
import java.util.List;
/**
* 敏感词 map
@@ -24,13 +22,13 @@ public interface IWordMap {
/**
* 是否包含敏感词
* @param string 字符串
* @param stringBuilder 缓冲
* @param context 上下文
* @return 是否包含
* @since 0.0.1
* @since 0.5.0
* @see ValidModeEnum#FAIL_FAST 建议使用快速返回模式
*/
WordContainsTypeEnum contains(final String string,
WordContainsTypeEnum contains(final StringBuilder stringBuilder,
final IWordContext context);
}

View File

@@ -7,13 +7,6 @@ package com.github.houbb.sensitive.word.api;
*/
public interface IWordResult {
/**
* 敏感词
* @return 敏感词
* @since 0.1.0
*/
String word();
/**
* 开始下标
* @return 开始下标

View File

@@ -10,9 +10,13 @@ public interface IWordResultHandler<R> {
/**
* 对于结果的处理
* @param wordResult 结果
* @param wordContext 上下文
* @param originalText 原始文本
* @return 处理结果
* @since 0.1.0
*/
R handle(final IWordResult wordResult);
R handle(final IWordResult wordResult,
final IWordContext wordContext,
final String originalText);
}

View File

@@ -434,7 +434,7 @@ public class SensitiveWordBs {
return CollectionUtil.toList(wordResults, new IHandler<IWordResult, R>() {
@Override
public R handle(IWordResult wordResult) {
return handler.handle(wordResult);
return handler.handle(wordResult, context, target);
}
});
}
@@ -453,7 +453,7 @@ public class SensitiveWordBs {
ArgUtil.notNull(handler, "handler");
IWordResult wordResult = sensitiveWord.findFirst(target, context);
return handler.handle(wordResult);
return handler.handle(wordResult, context, target);
}
/**

View File

@@ -57,14 +57,9 @@ public class SensitiveWord extends AbstractSensitiveWord {
int wordLength = checkResult.index();
if (wordLength > 0) {
// 保存敏感词
// TODO: 这其实是一个比较消耗的操作,后续可以考虑简化掉。
String sensitiveWord = text.substring(i, i + wordLength);
// 添加去重
WordResult wordResult = WordResult.newInstance()
.startIndex(i)
.endIndex(i+wordLength)
.word(sensitiveWord);
.endIndex(i+wordLength);
resultList.add(wordResult);
// 快速返回

View File

@@ -45,7 +45,7 @@ public class SensitiveCheckWord extends AbstractSensitiveCheck {
stringBuilder.append(mappingChar);
// 判断是否存在
WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder.toString(), context);
WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder, context);
if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) {
actualLength = stringBuilder.length();

View File

@@ -94,28 +94,30 @@ public class WordMap implements IWordMap {
* 1直接遍历所有
* 2如果遇到则直接返回 true
*
* @param string 字符串
* @param stringBuilder 字符串
* @return 是否包含
* @since 0.0.1
*/
@Override
public WordContainsTypeEnum contains(String string, final IWordContext context) {
if (StringUtil.isEmpty(string)) {
public WordContainsTypeEnum contains(StringBuilder stringBuilder, final IWordContext context) {
if (stringBuilder == null
|| stringBuilder.length() <= 0) {
return WordContainsTypeEnum.NOT_FOUND;
}
return innerContainsSensitive(string, context);
return innerContainsSensitive(stringBuilder, context);
}
private WordContainsTypeEnum innerContainsSensitive(String txt,
private WordContainsTypeEnum innerContainsSensitive(StringBuilder stringBuilder,
IWordContext context) {
// 初始化为当前的 map
Map nowMap = this.innerWordMap;
// 记录敏感词的长度
for (int i = 0; i < txt.length(); i++) {
final int len = stringBuilder.length();
for (int i = 0; i < len; i++) {
// 获取当前的 map 信息
nowMap = getNowMap(nowMap, context, txt, i);
nowMap = getNowMap(nowMap, context, stringBuilder, i);
// 如果不为空,则判断是否为结尾。
if (ObjectUtil.isNull(nowMap)) {
@@ -155,16 +157,16 @@ public class WordMap implements IWordMap {
* 获取当前的 Map
* @param nowMap 原始的当前 map
* @param context 上下文
* @param txt 文本信息
* @param stringBuilder 文本缓存
* @param index 下标
* @return 实际的当前 map
* @since 0.0.7
*/
private Map getNowMap(Map nowMap,
final IWordContext context,
final String txt,
final StringBuilder stringBuilder,
final int index) {
char c = txt.charAt(index);
char c = stringBuilder.charAt(index);
char mappingChar = context.charFormat().format(c, context);
// 这里做一次重复词的处理
@@ -173,7 +175,7 @@ public class WordMap implements IWordMap {
// 启用忽略重复&当前下标不是第一个
if(context.ignoreRepeat()
&& index > 0) {
char preChar = txt.charAt(index-1);
char preChar = stringBuilder.charAt(index-1);
char preMappingChar = context.charFormat().format(preChar, context);
// 直接赋值为上一个 map

View File

@@ -8,8 +8,6 @@ import com.github.houbb.sensitive.word.api.IWordResult;
*/
public class WordResult implements IWordResult {
private String word;
private int startIndex;
private int endIndex;
@@ -18,16 +16,6 @@ public class WordResult implements IWordResult {
return new WordResult();
}
@Override
public String word() {
return word;
}
public WordResult word(String word) {
this.word = word;
return this;
}
@Override
public int startIndex() {
return startIndex;
@@ -51,8 +39,7 @@ public class WordResult implements IWordResult {
@Override
public String toString() {
return "WordResult{" +
"word='" + word + '\'' +
", startIndex=" + startIndex +
"startIndex=" + startIndex +
", endIndex=" + endIndex +
'}';
}

View File

@@ -1,6 +1,7 @@
package com.github.houbb.sensitive.word.support.result;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
import com.github.houbb.sensitive.word.api.IWordResultHandler;
@@ -22,7 +23,7 @@ public class WordResultHandlerRaw implements IWordResultHandler<IWordResult> {
}
@Override
public IWordResult handle(IWordResult wordResult) {
public IWordResult handle(IWordResult wordResult, IWordContext wordContext, String originalText) {
return wordResult;
}

View File

@@ -1,8 +1,10 @@
package com.github.houbb.sensitive.word.support.result;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
import com.github.houbb.sensitive.word.api.IWordResultHandler;
import com.github.houbb.sensitive.word.utils.InnerCharUtils;
/**
* 只保留单词
@@ -23,11 +25,13 @@ public class WordResultHandlerWord implements IWordResultHandler<String> {
}
@Override
public String handle(IWordResult wordResult) {
public String handle(IWordResult wordResult, IWordContext wordContext, String originalText) {
if(wordResult == null) {
return null;
}
return wordResult.word();
// 截取
return InnerCharUtils.getString(originalText.toCharArray(), wordResult);
}
}

View File

@@ -2,6 +2,7 @@ package com.github.houbb.sensitive.word.utils;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.heaven.util.lang.ObjectUtil;
import com.github.houbb.sensitive.word.api.IWordResult;
import java.util.Map;
@@ -65,4 +66,32 @@ public final class InnerCharUtils {
return character;
}
/**
* 构建字符串
* @param chars 字符数组
* @param startIndex 开始位置
* @param endIndex 结束位置
* @return 结果
* @since 0.5.0
*/
public static String getString(final char[] chars,
final int startIndex,
final int endIndex) {
// 截取
int len = endIndex - startIndex;
return new String(chars, startIndex, len);
}
/**
* 构建字符串
* @param chars 字符数组
* @param wordResult 结果
* @return 结果
* @since 0.5.0
*/
public static String getString(final char[] chars,
final IWordResult wordResult) {
return getString(chars, wordResult.startIndex(), wordResult.endIndex());
}
}

View File

@@ -7,14 +7,18 @@ import org.junit.Ignore;
import org.junit.Test;
@Ignore
public class BasicTest {
public class BenchmarkBasicTest {
/**
*
*
* 100*100 耗时926ms性能較差
*
* 100*100000 的字符12942ms 第一次优化
* 100*100000 的字符
*
* 12942ms 第一次优化
* 12983ms 添加对应的 contains 优化性能无太大变化
*
*/
@Test
public void costTimeTest() {
@@ -34,6 +38,10 @@ public class BasicTest {
/**
*
* 100*100000 的字符12440ms
*
* 12111 第一次优化
*
* 1133 只有单词校验
*/
@Test
public void costTimeOnlyWordTest() {
@@ -43,7 +51,10 @@ public class BasicTest {
// 1W
long start = System.currentTimeMillis();
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
.enableWordCheck(false)
// .enableWordCheck(false)
.enableNumCheck(false)
.enableUrlCheck(false)
.enableEmailCheck(false)
.init();
for(int i = 0; i < 10000; i++) {

View File

@@ -62,7 +62,7 @@ public class SensitiveWordHelperTest {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
List<IWordResult> wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
Assert.assertEquals("[WordResult{word='五星红旗', startIndex=0, endIndex=4}, WordResult{word='毛主席', startIndex=9, endIndex=12}, WordResult{word='天安门', startIndex=18, endIndex=21}]", wordList.toString());
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4}, WordResult{startIndex=9, endIndex=12}, WordResult{startIndex=18, endIndex=21}]", wordList.toString());
}
@@ -99,7 +99,7 @@ public class SensitiveWordHelperTest {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw());
Assert.assertEquals("WordResult{word='五星红旗', startIndex=0, endIndex=4}", word.toString());
Assert.assertEquals("WordResult{startIndex=0, endIndex=4}", word.toString());
}
/**

View File

@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.replace;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
import com.github.houbb.sensitive.word.utils.InnerCharUtils;
/**
* 自定义敏感词替换策略
@@ -14,7 +15,7 @@ public class MySensitiveWordReplace implements ISensitiveWordReplace {
@Override
public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) {
String sensitiveWord = wordResult.word();
String sensitiveWord = InnerCharUtils.getString(rawChars, wordResult);
// 自定义不同的敏感词替换策略,可以从数据库等地方读取
if("五星红旗".equals(sensitiveWord)) {
stringBuilder.append("国家旗帜");