[Feature] add for new

This commit is contained in:
binbin.hou
2023-06-09 14:29:33 +08:00
parent 93b42276c5
commit 5a2b9da9df
24 changed files with 326 additions and 220 deletions

View File

@@ -1,25 +0,0 @@
package com.github.houbb.sensitive.word.api;
/**
* 敏感词替换策略上下文
*
* @author binbin.hou
* @since 0.2.0
*/
public interface ISensitiveWordReplaceContext {
/**
* 敏感词
* @return 敏感词
* @since 0.2.0
*/
String sensitiveWord();
/**
* 单词长度
* @return 单词长度
* @since 0.2.0
*/
int wordLength();
}

View File

@@ -1,5 +1,6 @@
package com.github.houbb.sensitive.word.api;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
@@ -23,12 +24,12 @@ public interface IWordMap {
/**
* 是否包含敏感词
* @param stringBuilder 缓冲
* @param context 上下文
* @param innerContext 上下文
* @return 是否包含
* @since 0.5.0
* @see ValidModeEnum#FAIL_FAST 建议使用快速返回模式
*/
WordContainsTypeEnum contains(final StringBuilder stringBuilder,
final IWordContext context);
final InnerSensitiveContext innerContext);
}

View File

@@ -0,0 +1,72 @@
package com.github.houbb.sensitive.word.api.context;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import java.util.Map;
/**
* 内部信息上下文
*
* @author binbin.hou
* @since 0.6.0
*/
public class InnerSensitiveContext {
/**
* 原始文本
*/
private String originalText;
/**
* 格式化后的字符
*/
private Map<Character, Character> formatCharMapping;
/**
* 校验模式
*/
private ValidModeEnum modeEnum;
/**
* 原始上下文
*/
private IWordContext wordContext;
public static InnerSensitiveContext newInstance() {
return new InnerSensitiveContext();
}
public String originalText() {
return originalText;
}
public InnerSensitiveContext originalText(String text) {
this.originalText = text;
return this;
}
public Map<Character, Character> formatCharMapping() {
return formatCharMapping;
}
public InnerSensitiveContext formatCharMapping(Map<Character, Character> formatCharMapping) {
this.formatCharMapping = formatCharMapping;
return this;
}
public ValidModeEnum modeEnum() {
return modeEnum;
}
public InnerSensitiveContext modeEnum(ValidModeEnum modeEnum) {
this.modeEnum = modeEnum;
return this;
}
public IWordContext wordContext() {
return wordContext;
}
public InnerSensitiveContext wordContext(IWordContext context) {
this.wordContext = context;
return this;
}
}

View File

@@ -2,7 +2,10 @@ package com.github.houbb.sensitive.word.core;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.*;
import com.github.houbb.sensitive.word.api.ISensitiveWord;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
import java.util.Collections;
import java.util.List;

View File

@@ -1,19 +1,18 @@
package com.github.houbb.sensitive.word.core;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.heaven.util.io.FileUtil;
import com.github.houbb.sensitive.word.api.ISensitiveWord;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl;
import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceContext;
import com.github.houbb.sensitive.word.support.result.WordResult;
import com.github.houbb.sensitive.word.utils.InnerFormatUtils;
import java.util.List;
import java.util.Map;
/**
* 默认实现
@@ -54,8 +53,15 @@ public class SensitiveWord extends AbstractSensitiveWord {
//TODO: 这里拆分为2个部分从而保障性能。但是要注意处理下标的问题。
//1. 原始的敏感词部分
//2. email/url/num 的单独一次遍历处理。
final Map<Character, Character> characterCharacterMap = InnerFormatUtils.formatCharsMapping(text, context);
final InnerSensitiveContext checkContext = InnerSensitiveContext.newInstance()
.originalText(text)
.wordContext(context)
.modeEnum(ValidModeEnum.FAIL_OVER)
.formatCharMapping(characterCharacterMap);
for (int i = 0; i < text.length(); i++) {
SensitiveCheckResult checkResult = sensitiveCheck.sensitiveCheck(text, i, ValidModeEnum.FAIL_OVER, context);
SensitiveCheckResult checkResult = sensitiveCheck.sensitiveCheck(i, checkContext);
// 命中
int wordLength = checkResult.index();

View File

@@ -1,7 +1,6 @@
package com.github.houbb.sensitive.word.support.check;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
/**
* 敏感信息监测接口
@@ -27,16 +26,12 @@ public interface ISensitiveCheck {
* 2. 敏感词的长度
* 3. 正常走过字段的长度(便于后期替换优化,避免不必要的循环重复)
*
* @param txt 文本信息
* @param beginIndex 开始下标
* @param validModeEnum 验证模式
* @param context 执行上下文
* @return 敏感信息对应的长度
* @since 0.0.5
*/
SensitiveCheckResult sensitiveCheck(final String txt,
final int beginIndex,
final ValidModeEnum validModeEnum,
final IWordContext context);
SensitiveCheckResult sensitiveCheck(final int beginIndex,
final InnerSensitiveContext context);
}

View File

@@ -2,9 +2,9 @@ package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import java.util.Map;
/**
* 抽象实现策略
@@ -19,34 +19,29 @@ public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveC
* 当前字符串是否符合规范
* @param mappingChar 当前字符
* @param index 下标
* @param rawText 原始文本
* @param context 上下文
* @param checkContext 校验文本
* @return 结果
* @since 0.3.2
*/
protected abstract boolean isCharCondition(char mappingChar,
int index,
String rawText,
final IWordContext context);
protected abstract boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext);
/**
* 这里指定一个阈值条件
* @param index 当前下标
* @param rawText 原始文本
* @param stringBuilder 缓存
* @param context 上下文
* @param checkContext 上下文
* @return 是否满足条件
* @since 0.3.2
*/
protected abstract boolean isStringCondition(int index,
String rawText,
final StringBuilder stringBuilder,
final IWordContext context);
final StringBuilder stringBuilder, InnerSensitiveContext checkContext);
@Override
protected int doGetActualLength(String txt, int beginIndex,
ValidModeEnum validModeEnum,
IWordContext context) {
protected int getActualLength(int beginIndex, InnerSensitiveContext checkContext) {
final String txt = checkContext.originalText();
final IWordContext context = checkContext.wordContext();
final Map<Character, Character> formatCharMapping = checkContext.formatCharMapping();
int actualLength = 0;
// 采用 ThreadLocal 应该可以提升性能,减少对象的创建。
@@ -55,12 +50,11 @@ public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveC
for(int i = beginIndex; i < txt.length(); i++) {
currentIx = i;
char currentChar = txt.charAt(i);
// 映射处理
char mappingChar = context.charFormat().format(currentChar, context);
char mappingChar = formatCharMapping.get(currentChar);
// 符合条件
boolean currentCondition = isCharCondition(mappingChar, i, txt, context);
boolean currentCondition = isCharCondition(mappingChar, i, checkContext);
//4 个场景
if(currentCondition) {
@@ -71,7 +65,7 @@ public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveC
}
// 匹配
if(isStringCondition(currentIx, txt, stringBuilder, context)) {
if(isStringCondition(currentIx, stringBuilder, checkContext)) {
actualLength = stringBuilder.length();
}

View File

@@ -2,8 +2,7 @@ package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
@@ -25,45 +24,23 @@ public abstract class AbstractSensitiveCheck implements ISensitiveCheck {
/**
* 获取确切的长度
* @param txt 文本
* @param beginIndex 开始
* @param validModeEnum 校验枚举
* @param context 上下文
* @param checkContext 上下文
* @return 长度
* @since 0.4.0
*/
protected abstract int doGetActualLength(String txt, int beginIndex,
ValidModeEnum validModeEnum,
IWordContext context);
/**
* 获取确切的长度
* @param txt 文本
* @param beginIndex 开始
* @param validModeEnum 校验枚举
* @param context 上下文
* @return 长度
* @since 0.4.0
*/
protected int getActualLength(String txt, int beginIndex,
ValidModeEnum validModeEnum,
IWordContext context) {
if(StringUtil.isEmpty(txt)) {
return 0;
}
return doGetActualLength(txt, beginIndex, validModeEnum, context);
}
protected abstract int getActualLength(int beginIndex, final InnerSensitiveContext checkContext);
@Override
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex,
ValidModeEnum validModeEnum,
IWordContext context) {
public SensitiveCheckResult sensitiveCheck(int beginIndex,
final InnerSensitiveContext checkContext) {
Class<? extends ISensitiveCheck> clazz = getSensitiveCheckClass();
final String txt = checkContext.originalText();
if(StringUtil.isEmpty(txt)) {
return SensitiveCheckResult.of(0, clazz);
}
int actualLength = getActualLength(txt, beginIndex, validModeEnum, context);
int actualLength = getActualLength(beginIndex, checkContext);
return SensitiveCheckResult.of(actualLength, clazz);
}

View File

@@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.util.regex.RegexUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
@@ -35,12 +35,17 @@ public class SensitiveCheckEmail extends AbstractConditionSensitiveCheck {
}
@Override
protected boolean isCharCondition(char mappingChar, int index, String rawText, IWordContext context) {
protected Class<? extends ISensitiveCheck> getSensitiveCheckClass() {
return SensitiveCheckEmail.class;
}
@Override
protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext) {
return CharUtil.isEmilChar(mappingChar);
}
@Override
protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) {
protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveContext checkContext) {
int bufferLen = stringBuilder.length();
//x@a.cn
@@ -55,9 +60,4 @@ public class SensitiveCheckEmail extends AbstractConditionSensitiveCheck {
return RegexUtil.isEmail(string);
}
@Override
protected Class<? extends ISensitiveCheck> getSensitiveCheckClass() {
return SensitiveCheckEmail.class;
}
}

View File

@@ -2,8 +2,7 @@ package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
@@ -25,10 +24,8 @@ public abstract class SensitiveCheckInit implements ISensitiveCheck {
@Override
public SensitiveCheckResult sensitiveCheck(String txt,
int beginIndex,
ValidModeEnum validModeEnum,
IWordContext context) {
public SensitiveCheckResult sensitiveCheck(final int beginIndex,
final InnerSensitiveContext checkContext) {
Pipeline<ISensitiveCheck> pipeline = new DefaultPipeline<>();
this.init(pipeline);
@@ -36,7 +33,7 @@ public abstract class SensitiveCheckInit implements ISensitiveCheck {
// 循环调用
for(ISensitiveCheck sensitiveCheck : sensitiveChecks) {
SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(txt, beginIndex, validModeEnum, context);
SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(beginIndex, checkContext);
if(result.index() > 0) {
return result;

View File

@@ -1,8 +1,7 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
@@ -29,13 +28,12 @@ public class SensitiveCheckNone implements ISensitiveCheck {
*/
private static final SensitiveCheckResult NONE_RESULT = SensitiveCheckResult.of(0, SensitiveCheckNone.class);
@Override
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
return NONE_RESULT;
}
public static SensitiveCheckResult getNoneResult() {
return NONE_RESULT;
}
@Override
public SensitiveCheckResult sensitiveCheck(int beginIndex, InnerSensitiveContext context) {
return NONE_RESULT;
}
}

View File

@@ -1,7 +1,7 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
/**
@@ -23,21 +23,20 @@ public class SensitiveCheckNum extends AbstractConditionSensitiveCheck {
return INSTANCE;
}
@Override
protected boolean isCharCondition(char mappingChar, int index, String rawText, IWordContext context) {
return Character.isDigit(mappingChar);
}
@Override
protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) {
int bufferLen = stringBuilder.length();
return bufferLen >= context.sensitiveCheckNumLen();
}
@Override
protected Class<? extends ISensitiveCheck> getSensitiveCheckClass() {
return SensitiveCheckNum.class;
}
@Override
protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext) {
return Character.isDigit(mappingChar);
}
@Override
protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveContext checkContext) {
int bufferLen = stringBuilder.length();
return bufferLen >= checkContext.wordContext().sensitiveCheckNumLen();
}
}

View File

@@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.util.regex.RegexUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
@@ -32,12 +32,17 @@ public class SensitiveCheckUrl extends AbstractConditionSensitiveCheck {
}
@Override
protected boolean isCharCondition(char mappingChar, int index, String rawText, IWordContext context) {
protected Class<? extends ISensitiveCheck> getSensitiveCheckClass() {
return SensitiveCheckUrl.class;
}
@Override
protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext) {
return CharUtil.isWebSiteChar(mappingChar);
}
@Override
protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) {
protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveContext checkContext) {
int bufferLen = stringBuilder.length();
//a.cn
if(bufferLen < 4) {
@@ -51,9 +56,4 @@ public class SensitiveCheckUrl extends AbstractConditionSensitiveCheck {
return RegexUtil.isWebSite(string);
}
@Override
protected Class<? extends ISensitiveCheck> getSensitiveCheckClass() {
return SensitiveCheckUrl.class;
}
}

View File

@@ -3,10 +3,13 @@ package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordMap;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import java.util.Map;
/**
* 敏感词监测实现
* @author binbin.hou
@@ -30,22 +33,27 @@ public class SensitiveCheckWord extends AbstractSensitiveCheck {
}
@Override
protected int doGetActualLength(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
protected int getActualLength(int beginIndex, InnerSensitiveContext innerContext) {
final String txt = innerContext.originalText();
final Map<Character, Character> formatCharMapping = innerContext.formatCharMapping();
final ValidModeEnum validModeEnum = innerContext.modeEnum();
final IWordContext context = innerContext.wordContext();
// 采用 ThreadLocal 应该可以提升性能,减少对象的创建。
int actualLength = 0;
final IWordMap wordMap = context.wordMap();
// 前一个条件
StringBuilder stringBuilder = new StringBuilder();
for(int i = beginIndex; i < txt.length(); i++) {
char currentChar = txt.charAt(i);
char[] rawChars = txt.toCharArray();
for(int i = beginIndex; i < rawChars.length; i++) {
// 映射处理
char mappingChar = context.charFormat().format(currentChar, context);
final char currentChar = rawChars[i];
char mappingChar = formatCharMapping.get(currentChar);
stringBuilder.append(mappingChar);
// 判断是否存在
WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder, context);
WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder, innerContext);
if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) {
actualLength = stringBuilder.length();

View File

@@ -5,6 +5,7 @@ import com.github.houbb.heaven.util.lang.ObjectUtil;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordMap;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
@@ -95,21 +96,23 @@ public class WordMap implements IWordMap {
* 2如果遇到则直接返回 true
*
* @param stringBuilder 字符串
* @param innerContext 内部上下文
* @return 是否包含
* @since 0.0.1
*/
@Override
public WordContainsTypeEnum contains(StringBuilder stringBuilder, final IWordContext context) {
public WordContainsTypeEnum contains(final StringBuilder stringBuilder,
final InnerSensitiveContext innerContext) {
if (stringBuilder == null
|| stringBuilder.length() <= 0) {
return WordContainsTypeEnum.NOT_FOUND;
}
return innerContainsSensitive(stringBuilder, context);
return innerContainsSensitive(stringBuilder, innerContext);
}
private WordContainsTypeEnum innerContainsSensitive(StringBuilder stringBuilder,
IWordContext context) {
final InnerSensitiveContext innerContext) {
// 初始化为当前的 map
Map nowMap = this.innerWordMap;
@@ -117,7 +120,7 @@ public class WordMap implements IWordMap {
final int len = stringBuilder.length();
for (int i = 0; i < len; i++) {
// 获取当前的 map 信息
nowMap = getNowMap(nowMap, context, stringBuilder, i);
nowMap = getNowMap(nowMap, i, stringBuilder, innerContext);
// 如果不为空,则判断是否为结尾。
if (ObjectUtil.isNull(nowMap)) {
@@ -156,18 +159,20 @@ public class WordMap implements IWordMap {
/**
* 获取当前的 Map
* @param nowMap 原始的当前 map
* @param context 上下文
* @param stringBuilder 文本缓存
* @param index 下标
* @param stringBuilder 文本缓存
* @param sensitiveContext 上下文
* @return 实际的当前 map
* @since 0.0.7
*/
private Map getNowMap(Map nowMap,
final IWordContext context,
final int index,
final StringBuilder stringBuilder,
final int index) {
char c = stringBuilder.charAt(index);
char mappingChar = context.charFormat().format(c, context);
final InnerSensitiveContext sensitiveContext) {
final IWordContext context = sensitiveContext.wordContext();
// 这里的 char 已经是统一格式化之后的,所以可以不用再次格式化。
char mappingChar = stringBuilder.charAt(index);
// 这里做一次重复词的处理
//TODO: 这里可以优化,是否获取一次。
@@ -175,8 +180,7 @@ public class WordMap implements IWordMap {
// 启用忽略重复&当前下标不是第一个
if(context.ignoreRepeat()
&& index > 0) {
char preChar = stringBuilder.charAt(index-1);
char preMappingChar = context.charFormat().format(preChar, context);
char preMappingChar = stringBuilder.charAt(index-1);
// 直接赋值为上一个 map
if(preMappingChar == mappingChar) {

View File

@@ -2,9 +2,7 @@ package com.github.houbb.sensitive.word.support.replace;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.constant.CharConst;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;

View File

@@ -1,57 +0,0 @@
package com.github.houbb.sensitive.word.support.replace;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
/**
* 敏感词替换上下文
*
* @author binbin.hou
* @since 0.2.0
*/
public class SensitiveWordReplaceContext implements ISensitiveWordReplaceContext {
public static SensitiveWordReplaceContext newInstance() {
return new SensitiveWordReplaceContext();
}
/**
* 敏感词
* @since 0.2.0
*/
private String sensitiveWord;
/**
* 单词长度
* @since 0.2.0
*/
private int wordLength;
@Override
public String sensitiveWord() {
return sensitiveWord;
}
public SensitiveWordReplaceContext sensitiveWord(String sensitiveWord) {
this.sensitiveWord = sensitiveWord;
return this;
}
@Override
public int wordLength() {
return wordLength;
}
public SensitiveWordReplaceContext wordLength(int wordLength) {
this.wordLength = wordLength;
return this;
}
@Override
public String toString() {
return "SensitiveWordReplaceContext{" +
"sensitiveWord='" + sensitiveWord + '\'' +
", wordLength=" + wordLength +
'}';
}
}

View File

@@ -5,8 +5,7 @@ import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.ICharFormat;
import com.github.houbb.sensitive.word.api.IWordContext;
import java.util.ArrayList;
import java.util.List;
import java.util.*;
/**
* 内部格式化工具类
@@ -16,6 +15,12 @@ public final class InnerFormatUtils {
private InnerFormatUtils(){}
/**
* 空字符数组
* @since 0.6.0
*/
private static final char[] EMPTY_CHARS = new char[0];
/**
* 格式化
* @param original 原始
@@ -39,6 +44,33 @@ public final class InnerFormatUtils {
return stringBuilder.toString();
}
/**
* 字符串统一的格式化处理
* @param original 原始文本
* @param context 上下文
* @return 结果
* @since 0.6.0
*/
public static Map<Character, Character> formatCharsMapping(final String original, final IWordContext context) {
if(StringUtil.isEmpty(original)) {
return Collections.emptyMap();
}
final int len = original.length();
char[] rawChars = original.toCharArray();
Map<Character, Character> map = new HashMap<>(rawChars.length);
ICharFormat charFormat = context.charFormat();
for(int i = 0; i < len; i++) {
final char currentChar = rawChars[i];
char formatChar = charFormat.format(currentChar, context);
map.put(currentChar, formatChar);
}
return map;
}
/**
* 格式化列表
* @param list 列表

View File

@@ -6,7 +6,7 @@ import com.github.houbb.sensitive.word.core.SensitiveWordHelper;
import org.junit.Ignore;
import org.junit.Test;
//@Ignore
@Ignore
public class BenchmarkBasicTest {
/**
@@ -42,6 +42,8 @@ public class BenchmarkBasicTest {
* 12111 第一次优化
*
* 1133 只有单词校验
*
* V0.6.0 优化 replace mapping 之后621ms性能接近翻倍。
*/
@Test
public void costTimeOnlyWordTest() {

View File

@@ -0,0 +1,76 @@
package com.github.houbb.sensitive.word.benchmark;
import com.github.houbb.heaven.util.util.RandomUtil;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
import com.github.houbb.sensitive.word.core.SensitiveWordHelper;
import org.junit.Ignore;
import org.junit.Test;
@Ignore
public class BenchmarkTimesTest {
/**
* 测试基准100+字符串 * 10W次
*
* V0.6.0: 1470ms接近 7.2W QPS
*/
@Test
public void onlyWordAndNoReplaceTest() {
// 1W 次
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
.enableWordCheck(true)
.enableNumCheck(false)
.enableUrlCheck(false)
.enableEmailCheck(false)
.ignoreRepeat(false)
.ignoreCase(false)
.ignoreNumStyle(false)
.ignoreChineseStyle(false)
.ignoreEnglishStyle(false)
.ignoreWidth(false)
.init();
String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100)
+ "我们他妈的从来不说脏说";
long start = System.currentTimeMillis();
for(int i = 0; i < 100_000; i++) {
sensitiveWordBs.findAll(randomText);
}
long end = System.currentTimeMillis();
System.out.println("------------------ COST: " + (end-start));
}
/**
* 测试基准100+字符串 * 10W次
*
* V0.6.0: 2744ms
*/
@Test
public void onlyWordAndWithReplaceTest() {
// 1W 次
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
.enableWordCheck(true)
.enableNumCheck(false)
.enableUrlCheck(false)
.enableEmailCheck(false)
.ignoreRepeat(true)
.ignoreCase(true)
.ignoreNumStyle(true)
.ignoreChineseStyle(true)
.ignoreEnglishStyle(true)
.ignoreWidth(true)
.init();
String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100)
+ "我们他妈的从来不说脏说";
long start = System.currentTimeMillis();
for(int i = 0; i < 100_000; i++) {
sensitiveWordBs.findAll(randomText);
}
long end = System.currentTimeMillis();
System.out.println("------------------ COST: " + (end-start));
}
}