mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
[Feature] add for new
This commit is contained in:
@@ -1,25 +0,0 @@
|
||||
package com.github.houbb.sensitive.word.api;
|
||||
|
||||
/**
|
||||
* 敏感词替换策略上下文
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.2.0
|
||||
*/
|
||||
public interface ISensitiveWordReplaceContext {
|
||||
|
||||
/**
|
||||
* 敏感词
|
||||
* @return 敏感词
|
||||
* @since 0.2.0
|
||||
*/
|
||||
String sensitiveWord();
|
||||
|
||||
/**
|
||||
* 单词长度
|
||||
* @return 单词长度
|
||||
* @since 0.2.0
|
||||
*/
|
||||
int wordLength();
|
||||
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
package com.github.houbb.sensitive.word.api;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
|
||||
|
||||
@@ -23,12 +24,12 @@ public interface IWordMap {
|
||||
/**
|
||||
* 是否包含敏感词
|
||||
* @param stringBuilder 缓冲
|
||||
* @param context 上下文
|
||||
* @param innerContext 上下文
|
||||
* @return 是否包含
|
||||
* @since 0.5.0
|
||||
* @see ValidModeEnum#FAIL_FAST 建议使用快速返回模式
|
||||
*/
|
||||
WordContainsTypeEnum contains(final StringBuilder stringBuilder,
|
||||
final IWordContext context);
|
||||
final InnerSensitiveContext innerContext);
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,72 @@
|
||||
package com.github.houbb.sensitive.word.api.context;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* 内部信息上下文
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.6.0
|
||||
*/
|
||||
public class InnerSensitiveContext {
|
||||
|
||||
/**
|
||||
* 原始文本
|
||||
*/
|
||||
private String originalText;
|
||||
/**
|
||||
* 格式化后的字符
|
||||
*/
|
||||
private Map<Character, Character> formatCharMapping;
|
||||
/**
|
||||
* 校验模式
|
||||
*/
|
||||
private ValidModeEnum modeEnum;
|
||||
/**
|
||||
* 原始上下文
|
||||
*/
|
||||
private IWordContext wordContext;
|
||||
|
||||
public static InnerSensitiveContext newInstance() {
|
||||
return new InnerSensitiveContext();
|
||||
}
|
||||
|
||||
public String originalText() {
|
||||
return originalText;
|
||||
}
|
||||
|
||||
public InnerSensitiveContext originalText(String text) {
|
||||
this.originalText = text;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Map<Character, Character> formatCharMapping() {
|
||||
return formatCharMapping;
|
||||
}
|
||||
|
||||
public InnerSensitiveContext formatCharMapping(Map<Character, Character> formatCharMapping) {
|
||||
this.formatCharMapping = formatCharMapping;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ValidModeEnum modeEnum() {
|
||||
return modeEnum;
|
||||
}
|
||||
|
||||
public InnerSensitiveContext modeEnum(ValidModeEnum modeEnum) {
|
||||
this.modeEnum = modeEnum;
|
||||
return this;
|
||||
}
|
||||
|
||||
public IWordContext wordContext() {
|
||||
return wordContext;
|
||||
}
|
||||
|
||||
public InnerSensitiveContext wordContext(IWordContext context) {
|
||||
this.wordContext = context;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,10 @@ package com.github.houbb.sensitive.word.core;
|
||||
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.sensitive.word.api.*;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWord;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
@@ -1,19 +1,18 @@
|
||||
package com.github.houbb.sensitive.word.core;
|
||||
|
||||
import com.github.houbb.heaven.util.guava.Guavas;
|
||||
import com.github.houbb.heaven.util.io.FileUtil;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWord;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl;
|
||||
import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceContext;
|
||||
import com.github.houbb.sensitive.word.support.result.WordResult;
|
||||
import com.github.houbb.sensitive.word.utils.InnerFormatUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* 默认实现
|
||||
@@ -54,8 +53,15 @@ public class SensitiveWord extends AbstractSensitiveWord {
|
||||
//TODO: 这里拆分为2个部分,从而保障性能。但是要注意处理下标的问题。
|
||||
//1. 原始的敏感词部分
|
||||
//2. email/url/num 的单独一次遍历处理。
|
||||
final Map<Character, Character> characterCharacterMap = InnerFormatUtils.formatCharsMapping(text, context);
|
||||
final InnerSensitiveContext checkContext = InnerSensitiveContext.newInstance()
|
||||
.originalText(text)
|
||||
.wordContext(context)
|
||||
.modeEnum(ValidModeEnum.FAIL_OVER)
|
||||
.formatCharMapping(characterCharacterMap);
|
||||
|
||||
for (int i = 0; i < text.length(); i++) {
|
||||
SensitiveCheckResult checkResult = sensitiveCheck.sensitiveCheck(text, i, ValidModeEnum.FAIL_OVER, context);
|
||||
SensitiveCheckResult checkResult = sensitiveCheck.sensitiveCheck(i, checkContext);
|
||||
|
||||
// 命中
|
||||
int wordLength = checkResult.index();
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package com.github.houbb.sensitive.word.support.check;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
|
||||
|
||||
/**
|
||||
* 敏感信息监测接口
|
||||
@@ -27,16 +26,12 @@ public interface ISensitiveCheck {
|
||||
* 2. 敏感词的长度
|
||||
* 3. 正常走过字段的长度(便于后期替换优化,避免不必要的循环重复)
|
||||
*
|
||||
* @param txt 文本信息
|
||||
* @param beginIndex 开始下标
|
||||
* @param validModeEnum 验证模式
|
||||
* @param context 执行上下文
|
||||
* @return 敏感信息对应的长度
|
||||
* @since 0.0.5
|
||||
*/
|
||||
SensitiveCheckResult sensitiveCheck(final String txt,
|
||||
final int beginIndex,
|
||||
final ValidModeEnum validModeEnum,
|
||||
final IWordContext context);
|
||||
SensitiveCheckResult sensitiveCheck(final int beginIndex,
|
||||
final InnerSensitiveContext context);
|
||||
|
||||
}
|
||||
|
||||
@@ -2,9 +2,9 @@ package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* 抽象实现策略
|
||||
@@ -19,34 +19,29 @@ public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveC
|
||||
* 当前字符串是否符合规范
|
||||
* @param mappingChar 当前字符
|
||||
* @param index 下标
|
||||
* @param rawText 原始文本
|
||||
* @param context 上下文
|
||||
* @param checkContext 校验文本
|
||||
* @return 结果
|
||||
* @since 0.3.2
|
||||
*/
|
||||
protected abstract boolean isCharCondition(char mappingChar,
|
||||
int index,
|
||||
String rawText,
|
||||
final IWordContext context);
|
||||
protected abstract boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext);
|
||||
|
||||
/**
|
||||
* 这里指定一个阈值条件
|
||||
* @param index 当前下标
|
||||
* @param rawText 原始文本
|
||||
* @param stringBuilder 缓存
|
||||
* @param context 上下文
|
||||
* @param checkContext 上下文
|
||||
* @return 是否满足条件
|
||||
* @since 0.3.2
|
||||
*/
|
||||
protected abstract boolean isStringCondition(int index,
|
||||
String rawText,
|
||||
final StringBuilder stringBuilder,
|
||||
final IWordContext context);
|
||||
final StringBuilder stringBuilder, InnerSensitiveContext checkContext);
|
||||
|
||||
@Override
|
||||
protected int doGetActualLength(String txt, int beginIndex,
|
||||
ValidModeEnum validModeEnum,
|
||||
IWordContext context) {
|
||||
protected int getActualLength(int beginIndex, InnerSensitiveContext checkContext) {
|
||||
final String txt = checkContext.originalText();
|
||||
final IWordContext context = checkContext.wordContext();
|
||||
final Map<Character, Character> formatCharMapping = checkContext.formatCharMapping();
|
||||
|
||||
int actualLength = 0;
|
||||
|
||||
// 采用 ThreadLocal 应该可以提升性能,减少对象的创建。
|
||||
@@ -55,12 +50,11 @@ public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveC
|
||||
for(int i = beginIndex; i < txt.length(); i++) {
|
||||
currentIx = i;
|
||||
char currentChar = txt.charAt(i);
|
||||
|
||||
// 映射处理
|
||||
char mappingChar = context.charFormat().format(currentChar, context);
|
||||
char mappingChar = formatCharMapping.get(currentChar);
|
||||
|
||||
// 符合条件
|
||||
boolean currentCondition = isCharCondition(mappingChar, i, txt, context);
|
||||
boolean currentCondition = isCharCondition(mappingChar, i, checkContext);
|
||||
|
||||
//4 个场景
|
||||
if(currentCondition) {
|
||||
@@ -71,7 +65,7 @@ public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveC
|
||||
}
|
||||
|
||||
// 匹配
|
||||
if(isStringCondition(currentIx, txt, stringBuilder, context)) {
|
||||
if(isStringCondition(currentIx, stringBuilder, checkContext)) {
|
||||
actualLength = stringBuilder.length();
|
||||
}
|
||||
|
||||
|
||||
@@ -2,8 +2,7 @@ package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
|
||||
@@ -25,45 +24,23 @@ public abstract class AbstractSensitiveCheck implements ISensitiveCheck {
|
||||
|
||||
/**
|
||||
* 获取确切的长度
|
||||
* @param txt 文本
|
||||
* @param beginIndex 开始
|
||||
* @param validModeEnum 校验枚举
|
||||
* @param context 上下文
|
||||
* @param checkContext 上下文
|
||||
* @return 长度
|
||||
* @since 0.4.0
|
||||
*/
|
||||
protected abstract int doGetActualLength(String txt, int beginIndex,
|
||||
ValidModeEnum validModeEnum,
|
||||
IWordContext context);
|
||||
|
||||
/**
|
||||
* 获取确切的长度
|
||||
* @param txt 文本
|
||||
* @param beginIndex 开始
|
||||
* @param validModeEnum 校验枚举
|
||||
* @param context 上下文
|
||||
* @return 长度
|
||||
* @since 0.4.0
|
||||
*/
|
||||
protected int getActualLength(String txt, int beginIndex,
|
||||
ValidModeEnum validModeEnum,
|
||||
IWordContext context) {
|
||||
if(StringUtil.isEmpty(txt)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return doGetActualLength(txt, beginIndex, validModeEnum, context);
|
||||
}
|
||||
protected abstract int getActualLength(int beginIndex, final InnerSensitiveContext checkContext);
|
||||
|
||||
@Override
|
||||
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex,
|
||||
ValidModeEnum validModeEnum,
|
||||
IWordContext context) {
|
||||
public SensitiveCheckResult sensitiveCheck(int beginIndex,
|
||||
final InnerSensitiveContext checkContext) {
|
||||
Class<? extends ISensitiveCheck> clazz = getSensitiveCheckClass();
|
||||
final String txt = checkContext.originalText();
|
||||
if(StringUtil.isEmpty(txt)) {
|
||||
return SensitiveCheckResult.of(0, clazz);
|
||||
}
|
||||
int actualLength = getActualLength(txt, beginIndex, validModeEnum, context);
|
||||
|
||||
int actualLength = getActualLength(beginIndex, checkContext);
|
||||
|
||||
return SensitiveCheckResult.of(actualLength, clazz);
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.check.impl;
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.util.lang.CharUtil;
|
||||
import com.github.houbb.heaven.util.util.regex.RegexUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
|
||||
import com.github.houbb.sensitive.word.constant.AppConst;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
|
||||
@@ -35,12 +35,17 @@ public class SensitiveCheckEmail extends AbstractConditionSensitiveCheck {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isCharCondition(char mappingChar, int index, String rawText, IWordContext context) {
|
||||
protected Class<? extends ISensitiveCheck> getSensitiveCheckClass() {
|
||||
return SensitiveCheckEmail.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext) {
|
||||
return CharUtil.isEmilChar(mappingChar);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) {
|
||||
protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveContext checkContext) {
|
||||
int bufferLen = stringBuilder.length();
|
||||
|
||||
//x@a.cn
|
||||
@@ -55,9 +60,4 @@ public class SensitiveCheckEmail extends AbstractConditionSensitiveCheck {
|
||||
return RegexUtil.isEmail(string);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Class<? extends ISensitiveCheck> getSensitiveCheckClass() {
|
||||
return SensitiveCheckEmail.class;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -2,8 +2,7 @@ package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.support.pipeline.Pipeline;
|
||||
import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
|
||||
@@ -25,10 +24,8 @@ public abstract class SensitiveCheckInit implements ISensitiveCheck {
|
||||
|
||||
|
||||
@Override
|
||||
public SensitiveCheckResult sensitiveCheck(String txt,
|
||||
int beginIndex,
|
||||
ValidModeEnum validModeEnum,
|
||||
IWordContext context) {
|
||||
public SensitiveCheckResult sensitiveCheck(final int beginIndex,
|
||||
final InnerSensitiveContext checkContext) {
|
||||
|
||||
Pipeline<ISensitiveCheck> pipeline = new DefaultPipeline<>();
|
||||
this.init(pipeline);
|
||||
@@ -36,7 +33,7 @@ public abstract class SensitiveCheckInit implements ISensitiveCheck {
|
||||
|
||||
// 循环调用
|
||||
for(ISensitiveCheck sensitiveCheck : sensitiveChecks) {
|
||||
SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(txt, beginIndex, validModeEnum, context);
|
||||
SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(beginIndex, checkContext);
|
||||
|
||||
if(result.index() > 0) {
|
||||
return result;
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
|
||||
@@ -29,13 +28,12 @@ public class SensitiveCheckNone implements ISensitiveCheck {
|
||||
*/
|
||||
private static final SensitiveCheckResult NONE_RESULT = SensitiveCheckResult.of(0, SensitiveCheckNone.class);
|
||||
|
||||
@Override
|
||||
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
return NONE_RESULT;
|
||||
}
|
||||
|
||||
public static SensitiveCheckResult getNoneResult() {
|
||||
return NONE_RESULT;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SensitiveCheckResult sensitiveCheck(int beginIndex, InnerSensitiveContext context) {
|
||||
return NONE_RESULT;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
|
||||
/**
|
||||
@@ -23,21 +23,20 @@ public class SensitiveCheckNum extends AbstractConditionSensitiveCheck {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isCharCondition(char mappingChar, int index, String rawText, IWordContext context) {
|
||||
return Character.isDigit(mappingChar);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) {
|
||||
int bufferLen = stringBuilder.length();
|
||||
|
||||
return bufferLen >= context.sensitiveCheckNumLen();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Class<? extends ISensitiveCheck> getSensitiveCheckClass() {
|
||||
return SensitiveCheckNum.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext) {
|
||||
return Character.isDigit(mappingChar);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveContext checkContext) {
|
||||
int bufferLen = stringBuilder.length();
|
||||
return bufferLen >= checkContext.wordContext().sensitiveCheckNumLen();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.check.impl;
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.util.lang.CharUtil;
|
||||
import com.github.houbb.heaven.util.util.regex.RegexUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
|
||||
import com.github.houbb.sensitive.word.constant.AppConst;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
|
||||
@@ -32,12 +32,17 @@ public class SensitiveCheckUrl extends AbstractConditionSensitiveCheck {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isCharCondition(char mappingChar, int index, String rawText, IWordContext context) {
|
||||
protected Class<? extends ISensitiveCheck> getSensitiveCheckClass() {
|
||||
return SensitiveCheckUrl.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext) {
|
||||
return CharUtil.isWebSiteChar(mappingChar);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) {
|
||||
protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveContext checkContext) {
|
||||
int bufferLen = stringBuilder.length();
|
||||
//a.cn
|
||||
if(bufferLen < 4) {
|
||||
@@ -51,9 +56,4 @@ public class SensitiveCheckUrl extends AbstractConditionSensitiveCheck {
|
||||
return RegexUtil.isWebSite(string);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Class<? extends ISensitiveCheck> getSensitiveCheckClass() {
|
||||
return SensitiveCheckUrl.class;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -3,10 +3,13 @@ package com.github.houbb.sensitive.word.support.check.impl;
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordMap;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* 敏感词监测实现
|
||||
* @author binbin.hou
|
||||
@@ -30,22 +33,27 @@ public class SensitiveCheckWord extends AbstractSensitiveCheck {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int doGetActualLength(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
protected int getActualLength(int beginIndex, InnerSensitiveContext innerContext) {
|
||||
final String txt = innerContext.originalText();
|
||||
final Map<Character, Character> formatCharMapping = innerContext.formatCharMapping();
|
||||
final ValidModeEnum validModeEnum = innerContext.modeEnum();
|
||||
final IWordContext context = innerContext.wordContext();
|
||||
|
||||
// 采用 ThreadLocal 应该可以提升性能,减少对象的创建。
|
||||
int actualLength = 0;
|
||||
final IWordMap wordMap = context.wordMap();
|
||||
|
||||
// 前一个条件
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
for(int i = beginIndex; i < txt.length(); i++) {
|
||||
char currentChar = txt.charAt(i);
|
||||
|
||||
char[] rawChars = txt.toCharArray();
|
||||
for(int i = beginIndex; i < rawChars.length; i++) {
|
||||
// 映射处理
|
||||
char mappingChar = context.charFormat().format(currentChar, context);
|
||||
final char currentChar = rawChars[i];
|
||||
char mappingChar = formatCharMapping.get(currentChar);
|
||||
stringBuilder.append(mappingChar);
|
||||
|
||||
// 判断是否存在
|
||||
WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder, context);
|
||||
WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder, innerContext);
|
||||
if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) {
|
||||
actualLength = stringBuilder.length();
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import com.github.houbb.heaven.util.lang.ObjectUtil;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordMap;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
|
||||
import com.github.houbb.sensitive.word.constant.AppConst;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
|
||||
|
||||
@@ -95,21 +96,23 @@ public class WordMap implements IWordMap {
|
||||
* (2)如果遇到,则直接返回 true
|
||||
*
|
||||
* @param stringBuilder 字符串
|
||||
* @param innerContext 内部上下文
|
||||
* @return 是否包含
|
||||
* @since 0.0.1
|
||||
*/
|
||||
@Override
|
||||
public WordContainsTypeEnum contains(StringBuilder stringBuilder, final IWordContext context) {
|
||||
public WordContainsTypeEnum contains(final StringBuilder stringBuilder,
|
||||
final InnerSensitiveContext innerContext) {
|
||||
if (stringBuilder == null
|
||||
|| stringBuilder.length() <= 0) {
|
||||
return WordContainsTypeEnum.NOT_FOUND;
|
||||
}
|
||||
|
||||
return innerContainsSensitive(stringBuilder, context);
|
||||
return innerContainsSensitive(stringBuilder, innerContext);
|
||||
}
|
||||
|
||||
private WordContainsTypeEnum innerContainsSensitive(StringBuilder stringBuilder,
|
||||
IWordContext context) {
|
||||
final InnerSensitiveContext innerContext) {
|
||||
// 初始化为当前的 map
|
||||
Map nowMap = this.innerWordMap;
|
||||
|
||||
@@ -117,7 +120,7 @@ public class WordMap implements IWordMap {
|
||||
final int len = stringBuilder.length();
|
||||
for (int i = 0; i < len; i++) {
|
||||
// 获取当前的 map 信息
|
||||
nowMap = getNowMap(nowMap, context, stringBuilder, i);
|
||||
nowMap = getNowMap(nowMap, i, stringBuilder, innerContext);
|
||||
|
||||
// 如果不为空,则判断是否为结尾。
|
||||
if (ObjectUtil.isNull(nowMap)) {
|
||||
@@ -156,18 +159,20 @@ public class WordMap implements IWordMap {
|
||||
/**
|
||||
* 获取当前的 Map
|
||||
* @param nowMap 原始的当前 map
|
||||
* @param context 上下文
|
||||
* @param stringBuilder 文本缓存
|
||||
* @param index 下标
|
||||
* @param stringBuilder 文本缓存
|
||||
* @param sensitiveContext 上下文
|
||||
* @return 实际的当前 map
|
||||
* @since 0.0.7
|
||||
*/
|
||||
private Map getNowMap(Map nowMap,
|
||||
final IWordContext context,
|
||||
final int index,
|
||||
final StringBuilder stringBuilder,
|
||||
final int index) {
|
||||
char c = stringBuilder.charAt(index);
|
||||
char mappingChar = context.charFormat().format(c, context);
|
||||
final InnerSensitiveContext sensitiveContext) {
|
||||
final IWordContext context = sensitiveContext.wordContext();
|
||||
|
||||
// 这里的 char 已经是统一格式化之后的,所以可以不用再次格式化。
|
||||
char mappingChar = stringBuilder.charAt(index);
|
||||
|
||||
// 这里做一次重复词的处理
|
||||
//TODO: 这里可以优化,是否获取一次。
|
||||
@@ -175,8 +180,7 @@ public class WordMap implements IWordMap {
|
||||
// 启用忽略重复&当前下标不是第一个
|
||||
if(context.ignoreRepeat()
|
||||
&& index > 0) {
|
||||
char preChar = stringBuilder.charAt(index-1);
|
||||
char preMappingChar = context.charFormat().format(preChar, context);
|
||||
char preMappingChar = stringBuilder.charAt(index-1);
|
||||
|
||||
// 直接赋值为上一个 map
|
||||
if(preMappingChar == mappingChar) {
|
||||
|
||||
@@ -2,9 +2,7 @@ package com.github.houbb.sensitive.word.support.replace;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.constant.CharConst;
|
||||
import com.github.houbb.heaven.util.lang.CharUtil;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
package com.github.houbb.sensitive.word.support.replace;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
|
||||
|
||||
/**
|
||||
* 敏感词替换上下文
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.2.0
|
||||
*/
|
||||
public class SensitiveWordReplaceContext implements ISensitiveWordReplaceContext {
|
||||
|
||||
public static SensitiveWordReplaceContext newInstance() {
|
||||
return new SensitiveWordReplaceContext();
|
||||
}
|
||||
|
||||
/**
|
||||
* 敏感词
|
||||
* @since 0.2.0
|
||||
*/
|
||||
private String sensitiveWord;
|
||||
|
||||
/**
|
||||
* 单词长度
|
||||
* @since 0.2.0
|
||||
*/
|
||||
private int wordLength;
|
||||
|
||||
@Override
|
||||
public String sensitiveWord() {
|
||||
return sensitiveWord;
|
||||
}
|
||||
|
||||
public SensitiveWordReplaceContext sensitiveWord(String sensitiveWord) {
|
||||
this.sensitiveWord = sensitiveWord;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int wordLength() {
|
||||
return wordLength;
|
||||
}
|
||||
|
||||
public SensitiveWordReplaceContext wordLength(int wordLength) {
|
||||
this.wordLength = wordLength;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SensitiveWordReplaceContext{" +
|
||||
"sensitiveWord='" + sensitiveWord + '\'' +
|
||||
", wordLength=" + wordLength +
|
||||
'}';
|
||||
}
|
||||
|
||||
}
|
||||
@@ -5,8 +5,7 @@ import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.sensitive.word.api.ICharFormat;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* 内部格式化工具类
|
||||
@@ -16,6 +15,12 @@ public final class InnerFormatUtils {
|
||||
|
||||
private InnerFormatUtils(){}
|
||||
|
||||
/**
|
||||
* 空字符数组
|
||||
* @since 0.6.0
|
||||
*/
|
||||
private static final char[] EMPTY_CHARS = new char[0];
|
||||
|
||||
/**
|
||||
* 格式化
|
||||
* @param original 原始
|
||||
@@ -39,6 +44,33 @@ public final class InnerFormatUtils {
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* 字符串统一的格式化处理
|
||||
* @param original 原始文本
|
||||
* @param context 上下文
|
||||
* @return 结果
|
||||
* @since 0.6.0
|
||||
*/
|
||||
public static Map<Character, Character> formatCharsMapping(final String original, final IWordContext context) {
|
||||
if(StringUtil.isEmpty(original)) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
|
||||
final int len = original.length();
|
||||
|
||||
char[] rawChars = original.toCharArray();
|
||||
Map<Character, Character> map = new HashMap<>(rawChars.length);
|
||||
|
||||
ICharFormat charFormat = context.charFormat();
|
||||
for(int i = 0; i < len; i++) {
|
||||
final char currentChar = rawChars[i];
|
||||
char formatChar = charFormat.format(currentChar, context);
|
||||
map.put(currentChar, formatChar);
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化列表
|
||||
* @param list 列表
|
||||
|
||||
@@ -6,7 +6,7 @@ import com.github.houbb.sensitive.word.core.SensitiveWordHelper;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
//@Ignore
|
||||
@Ignore
|
||||
public class BenchmarkBasicTest {
|
||||
|
||||
/**
|
||||
@@ -42,6 +42,8 @@ public class BenchmarkBasicTest {
|
||||
* 12111 第一次优化
|
||||
*
|
||||
* 1133 只有单词校验
|
||||
*
|
||||
* V0.6.0 优化 replace mapping 之后:621ms,性能接近翻倍。
|
||||
*/
|
||||
@Test
|
||||
public void costTimeOnlyWordTest() {
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
package com.github.houbb.sensitive.word.benchmark;
|
||||
|
||||
import com.github.houbb.heaven.util.util.RandomUtil;
|
||||
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||
import com.github.houbb.sensitive.word.core.SensitiveWordHelper;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
@Ignore
|
||||
public class BenchmarkTimesTest {
|
||||
|
||||
/**
|
||||
* 测试基准:100+字符串 * 10W次
|
||||
*
|
||||
* V0.6.0: 1470ms,接近 7.2W QPS
|
||||
*/
|
||||
@Test
|
||||
public void onlyWordAndNoReplaceTest() {
|
||||
// 1W 次
|
||||
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
|
||||
.enableWordCheck(true)
|
||||
.enableNumCheck(false)
|
||||
.enableUrlCheck(false)
|
||||
.enableEmailCheck(false)
|
||||
.ignoreRepeat(false)
|
||||
.ignoreCase(false)
|
||||
.ignoreNumStyle(false)
|
||||
.ignoreChineseStyle(false)
|
||||
.ignoreEnglishStyle(false)
|
||||
.ignoreWidth(false)
|
||||
.init();
|
||||
|
||||
String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100)
|
||||
+ "我们他妈的从来不说脏说";
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
for(int i = 0; i < 100_000; i++) {
|
||||
sensitiveWordBs.findAll(randomText);
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
System.out.println("------------------ COST: " + (end-start));
|
||||
}
|
||||
|
||||
/**
|
||||
* 测试基准:100+字符串 * 10W次
|
||||
*
|
||||
* V0.6.0: 2744ms
|
||||
*/
|
||||
@Test
|
||||
public void onlyWordAndWithReplaceTest() {
|
||||
// 1W 次
|
||||
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
|
||||
.enableWordCheck(true)
|
||||
.enableNumCheck(false)
|
||||
.enableUrlCheck(false)
|
||||
.enableEmailCheck(false)
|
||||
.ignoreRepeat(true)
|
||||
.ignoreCase(true)
|
||||
.ignoreNumStyle(true)
|
||||
.ignoreChineseStyle(true)
|
||||
.ignoreEnglishStyle(true)
|
||||
.ignoreWidth(true)
|
||||
.init();
|
||||
|
||||
String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100)
|
||||
+ "我们他妈的从来不说脏说";
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
for(int i = 0; i < 100_000; i++) {
|
||||
sensitiveWordBs.findAll(randomText);
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
System.out.println("------------------ COST: " + (end-start));
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user