release branch 0.4.0

This commit is contained in:
binbin.hou
2023-06-08 14:53:57 +08:00
parent 10dfb3acd2
commit 678686df0b
25 changed files with 413 additions and 209 deletions

View File

@@ -163,3 +163,10 @@
| 1 | O | 中文繁简体样式 | 2023-06-07 23:51:58 | 调整实现策略 |
| 2 | A | 代码结构优化 | 2023-06-07 23:51:58 | 调整实现策略 |
# release_0.4.0
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|:------------|:--------------------|:-------|
| 1 | O | 优化单词校验逻辑 | 2023-06-08 23:51:58 | |
| 2 | A | 新增是否单词校验的开关 | 2023-06-08 23:51:58 | |

View File

@@ -366,8 +366,9 @@ Assert.assertTrue(wordBs.contains(text));
| 7 | enableNumCheck | 是否启用数字检测。 | true |
| 8 | enableEmailCheck | 是有启用邮箱检测 | true |
| 9 | enableUrlCheck | 是否启用链接检测 | true |
| 10 | numCheckLen | 数字检测,自定义指定长度。 | 8 |
| 11 | sensitiveWordReplace | 敏感词替换策略 | `*` 替换 |
| 10 | enableUrlCheck | 是否启用敏感单词检测 | true |
| 11 | numCheckLen | 数字检测,自定义指定长度。 | 8 |
| 12 | sensitiveWordReplace | 敏感词替换策略 | `*` 替换 |
# 动态加载(用户自定义)
@@ -617,7 +618,11 @@ public class SensitiveWordService {
# 后期 road-map
- [ ] wordMap 的抽象,便于拓展
- [x] wordMap 的抽象,便于拓展
- [ ] word 的统一性能优化,移除 string 的生成
- [ ] word 策略的优化,统一遍历+转换
- 同音字处理

View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.3.2</version>
<version>0.4.0</version>
<properties>
<!--============================== All Plugins START ==============================-->

View File

@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
SET version=0.3.2
SET version=0.4.0
:::: 新版本名称
SET newVersion=0.3.3
SET newVersion=0.5.0
:::: 组织名称
SET groupName=com.github.houbb
:::: 项目名称

View File

@@ -10,10 +10,15 @@ public interface ISensitiveWordReplace {
/**
* 替换
* @param context 上下文
* @return 结果
* @since 0.2.0
* <p>
* 説明:废弃以前的字符串返回,减少对象创建,提升性能。
*
* @param stringBuilder 字符串连接器
* @param rawChars 原始字符串
* @param wordResult 当前的敏感词结果
* @param wordContext 上下文
* @since 0.4.0
*/
String replace(ISensitiveWordReplaceContext context);
void replace(final StringBuilder stringBuilder, final char[] rawChars, final IWordResult wordResult, final IWordContext wordContext);
}

View File

@@ -1,10 +1,7 @@
package com.github.houbb.sensitive.word.api;
import com.github.houbb.sensitive.word.bs.SensitiveWordContext;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import java.util.Map;
/**
* @author binbin.hou
* @since 0.0.4
@@ -72,64 +69,62 @@ public interface IWordContext {
IWordContext ignoreChineseStyle(final boolean ignoreChineseStyle);
/**
* 获取敏感词信息
* @return 敏感词
* @since 0.0.5
* 是否启用单词
* @return 是否
*/
Map sensitiveWordMap();
boolean enableWordCheck();
/**
* 敏感词信息
* @param map map 信息
* @return this
* @since 0.0.5
* 设置是否启用单词
* @param enableWordCheck 是否
* @return 结果
*/
IWordContext sensitiveWordMap(final Map map);
IWordContext enableWordCheck(boolean enableWordCheck);
/**
* 敏感数字检测
* @return 数字检测
* @since 0.0.5
*/
boolean sensitiveCheckNum();
boolean enableNumCheck();
/**
* 设置敏感数字检测
* @param sensitiveCheckNum 数字格式检测
* @param enableNumCheck 数字格式检测
* @return this
* @since 0.0.5
*/
IWordContext sensitiveCheckNum(final boolean sensitiveCheckNum);
IWordContext enableNumCheck(final boolean enableNumCheck);
/**
* 是否进行邮箱检测
* @return this
* @since 0.0.9
*/
boolean sensitiveCheckEmail();
boolean enableEmailCheck();
/**
* 设置敏感邮箱检测
* @param sensitiveCheckEmail 是否检测
* @param enableEmailCheck 是否检测
* @return this
* @since 0.0.9
*/
IWordContext sensitiveCheckEmail(final boolean sensitiveCheckEmail);
IWordContext enableEmailCheck(final boolean enableEmailCheck);
/**
* 敏感链接检测
* @return 是否启用
* @since 0.
*/
boolean sensitiveCheckUrl();
boolean enableUrlCheck();
/**
* 设置敏感邮箱检测
* @param sensitiveCheckUrl 是否检测
* @param enableUrlCheck 是否检测
* @return this
* @since 0.0.9
*/
IWordContext sensitiveCheckUrl(final boolean sensitiveCheckUrl);
IWordContext enableUrlCheck(final boolean enableUrlCheck);
/**
* 忽略英文的写法

View File

@@ -1,6 +1,7 @@
package com.github.houbb.sensitive.word.api;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import java.util.Collection;
@@ -29,7 +30,7 @@ public interface IWordMap {
* @since 0.0.1
* @see ValidModeEnum#FAIL_FAST 建议使用快速返回模式
*/
boolean contains(final String string,
final IWordContext context);
WordContainsTypeEnum contains(final String string,
final IWordContext context);
}

View File

@@ -65,21 +65,27 @@ public class SensitiveWordBs {
/**
* 启用数字检测
*/
private boolean sensitiveCheckNum = true;
private boolean enableNumCheck = true;
/**
* 启用邮箱检测
*/
private boolean sensitiveCheckEmail = true;
private boolean enableEmailCheck = true;
/**
* 启用 URL 检测
*/
private boolean sensitiveCheckUrl = true;
private boolean enableUrlCheck = true;
/**
* 单词校验
* @since 0.4.0
*/
private boolean enableWordCheck = true;
// 额外配置
/**
* 检测数字时的长度
*/
private int sensitiveCheckNumLen = 8;
private int numCheckLen = 8;
//------------------------------------------------------------- 基本属性 END
/**
@@ -177,12 +183,13 @@ public class SensitiveWordBs {
context.ignoreRepeat(ignoreRepeat);
// 开启校验
context.sensitiveCheckNum(sensitiveCheckNum);
context.sensitiveCheckEmail(sensitiveCheckEmail);
context.sensitiveCheckUrl(sensitiveCheckUrl);
context.enableNumCheck(enableNumCheck);
context.enableEmailCheck(enableEmailCheck);
context.enableUrlCheck(enableUrlCheck);
context.enableWordCheck(enableWordCheck);
// 额外配置
context.sensitiveCheckNumLen(sensitiveCheckNumLen);
context.sensitiveCheckNumLen(numCheckLen);
context.sensitiveWordReplace(sensitiveWordReplace);
context.wordMap(wordMap);
@@ -247,6 +254,18 @@ public class SensitiveWordBs {
return this;
}
/**
* 设置是否启动数字检测
*
* @param enableWordCheck 数字检测
* @since 0.0.11
* @return this
*/
public SensitiveWordBs enableWordCheck(boolean enableWordCheck) {
this.enableWordCheck = enableWordCheck;
return this;
}
/**
* 设置是否启动数字检测
*
@@ -255,7 +274,7 @@ public class SensitiveWordBs {
* @return this
*/
public SensitiveWordBs enableNumCheck(boolean enableNumCheck) {
this.sensitiveCheckNum = enableNumCheck;
this.enableNumCheck = enableNumCheck;
return this;
}
@@ -266,7 +285,7 @@ public class SensitiveWordBs {
* @since 0.2.1
*/
public SensitiveWordBs numCheckLen(int numCheckLen) {
this.sensitiveCheckNumLen = numCheckLen;
this.numCheckLen = numCheckLen;
return this;
}
@@ -278,7 +297,7 @@ public class SensitiveWordBs {
* @return this
*/
public SensitiveWordBs enableEmailCheck(boolean enableEmailCheck) {
this.sensitiveCheckEmail = enableEmailCheck;
this.enableEmailCheck = enableEmailCheck;
return this;
}
@@ -290,7 +309,7 @@ public class SensitiveWordBs {
* @return this
*/
public SensitiveWordBs enableUrlCheck(boolean enableUrlCheck) {
this.sensitiveCheckUrl = enableUrlCheck;
this.enableUrlCheck = enableUrlCheck;
return this;
}

View File

@@ -1,10 +1,11 @@
package com.github.houbb.sensitive.word.bs;
import com.github.houbb.sensitive.word.api.*;
import com.github.houbb.sensitive.word.api.ICharFormat;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordMap;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import java.util.Map;
/**
* 上下文
* @author binbin.hou
@@ -31,17 +32,16 @@ public class SensitiveWordContext implements IWordContext {
private boolean ignoreNumStyle;
/**
* 敏感词信息
* @since 0.0.5
* 启动单词校验
* @since 0.4.0
*/
@Deprecated
private Map sensitiveWordMap;
private boolean enableWordCheck;
/**
* 是否进行敏感数字检测
* @since 0.0.6
*/
private boolean sensitiveCheckNum;
private boolean enableNumCheck;
/**
* 是否忽略中文繁简体
@@ -65,13 +65,13 @@ public class SensitiveWordContext implements IWordContext {
* 是否进行邮箱测试
* @since 0.0.9
*/
private boolean sensitiveCheckEmail;
private boolean enableEmailCheck;
/**
* 是否进行 url 测试
* @since 0.0.12
*/
private boolean sensitiveCheckUrl;
private boolean enableUrlCheck;
/**
* 敏感数字检测对应的长度限制
@@ -182,25 +182,23 @@ public class SensitiveWordContext implements IWordContext {
return this;
}
@Override
public Map sensitiveWordMap() {
return sensitiveWordMap;
public boolean enableWordCheck() {
return enableWordCheck;
}
@Override
public SensitiveWordContext sensitiveWordMap(Map sensitiveWordMap) {
this.sensitiveWordMap = sensitiveWordMap;
public SensitiveWordContext enableWordCheck(boolean enableWordCheck) {
this.enableWordCheck = enableWordCheck;
return this;
}
@Override
public boolean sensitiveCheckNum() {
return sensitiveCheckNum;
public boolean enableNumCheck() {
return enableNumCheck;
}
@Override
public SensitiveWordContext sensitiveCheckNum(boolean sensitiveCheckNum) {
this.sensitiveCheckNum = sensitiveCheckNum;
public SensitiveWordContext enableNumCheck(boolean enableNumCheck) {
this.enableNumCheck = enableNumCheck;
return this;
}
@@ -238,24 +236,24 @@ public class SensitiveWordContext implements IWordContext {
}
@Override
public boolean sensitiveCheckEmail() {
return sensitiveCheckEmail;
public boolean enableEmailCheck() {
return enableEmailCheck;
}
@Override
public SensitiveWordContext sensitiveCheckEmail(boolean sensitiveCheckEmail) {
this.sensitiveCheckEmail = sensitiveCheckEmail;
public SensitiveWordContext enableEmailCheck(boolean enableEmailCheck) {
this.enableEmailCheck = enableEmailCheck;
return this;
}
@Override
public boolean sensitiveCheckUrl() {
return sensitiveCheckUrl;
public boolean enableUrlCheck() {
return enableUrlCheck;
}
@Override
public SensitiveWordContext sensitiveCheckUrl(boolean sensitiveCheckUrl) {
this.sensitiveCheckUrl = sensitiveCheckUrl;
public SensitiveWordContext enableUrlCheck(boolean enableUrlCheck) {
this.enableUrlCheck = enableUrlCheck;
return this;
}

View File

@@ -19,34 +19,16 @@ public final class AppConst {
*/
public static final String IS_END = "ED";
/**
* 字典的大小
* @since 0.0.1
*/
public static final int DICT_SIZE = 65275;
/**
* 英语词典的大小
* @since 0.0.4
*/
public static final int DICT_EN_SIZE = 12;
/**
* 拒绝的词语
* @since 0.0.8
*/
public static final String SENSITIVE_WORD_DENY_PATH = "/sensitive_word_deny.txt";
/**
* 用户允许的词语
* @since 0.0.8
*/
public static final String SENSITIVE_WORD_ALLOW_PATH = "/sensitive_word_allow.txt";
/**
* 最长的网址长度
* @since 0.3.0
*/
public static final int MAX_WEB_SITE_LEN = 70;
/**
* 最大邮箱地址
* @since 0.4.0
*/
public static final int MAX_EMAIL_LEN = 64;
}

View File

@@ -0,0 +1,24 @@
package com.github.houbb.sensitive.word.constant.enums;
/**
* 单词包含类别
* @since 0.4.0
*/
public enum WordContainsTypeEnum {
/**
* 包含+前缀
*/
CONTAINS_PREFIX,
/**
* 包含+且是结尾
*/
CONTAINS_END,
/**
* 不存在
*/
NOT_FOUND,
}

View File

@@ -3,11 +3,9 @@ package com.github.houbb.sensitive.word.core;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.*;
import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceContext;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
* 抽象实现
@@ -42,7 +40,7 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
// 注意边界
int startIndex = 0;
char[] chars = target.toCharArray();
char[] rawChars = target.toCharArray();
for(IWordResult wordResult : allList) {
final int itemStartIx = wordResult.startIndex();
@@ -50,24 +48,19 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
// 脱敏的左边
if(startIndex < itemStartIx) {
stringBuilder.append(chars, startIndex, itemStartIx-startIndex);
stringBuilder.append(rawChars, startIndex, itemStartIx-startIndex);
}
// 脱敏部分
String word = wordResult.word();
ISensitiveWordReplaceContext replaceContext = SensitiveWordReplaceContext.newInstance()
.sensitiveWord(word)
.wordLength(word.length());
String replacedText = replace.replace(replaceContext);
stringBuilder.append(replacedText);
replace.replace(stringBuilder, rawChars, wordResult, context);
// 更新结尾
startIndex = Math.max(startIndex, itemEndIx);
}
// 最后部分
if (startIndex < chars.length) {
stringBuilder.append(chars, startIndex, chars.length-startIndex);
if (startIndex < rawChars.length) {
stringBuilder.append(rawChars, startIndex, rawChars.length-startIndex);
}
return stringBuilder.toString();

View File

@@ -57,6 +57,7 @@ public class SensitiveWord extends AbstractSensitiveWord {
int wordLength = checkResult.index();
if (wordLength > 0) {
// 保存敏感词
// TODO: 这其实是一个比较消耗的操作,后续可以考虑简化掉。
String sensitiveWord = text.substring(i, i + wordLength);
// 添加去重

View File

@@ -0,0 +1,83 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
/**
* 抽象实现策略
*
* @author binbin.hou
* @since 0.3.2
*/
@ThreadSafe
public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveCheck {
/**
* 当前字符串是否符合规范
* @param mappingChar 当前字符
* @param index 下标
* @param rawText 原始文本
* @param context 上下文
* @return 结果
* @since 0.3.2
*/
protected abstract boolean isCharCondition(char mappingChar,
int index,
String rawText,
final IWordContext context);
/**
* 这里指定一个阈值条件
* @param index 当前下标
* @param rawText 原始文本
* @param stringBuilder 缓存
* @param context 上下文
* @return 是否满足条件
* @since 0.3.2
*/
protected abstract boolean isStringCondition(int index,
String rawText,
final StringBuilder stringBuilder,
final IWordContext context);
@Override
protected int doGetActualLength(String txt, int beginIndex,
ValidModeEnum validModeEnum,
IWordContext context) {
int actualLength = 0;
// 采用 ThreadLocal 应该可以提升性能,减少对象的创建。
StringBuilder stringBuilder = new StringBuilder();
// 前一个条件
boolean preCondition = false;
int currentIx = 0;
for(int i = beginIndex; i < txt.length(); i++) {
currentIx = i;
char currentChar = txt.charAt(i);
// 映射处理
char mappingChar = context.charFormat().format(currentChar, context);
// 符合条件
boolean currentCondition = isCharCondition(mappingChar, i, txt, context);
//4 个场景
if(currentCondition) {
stringBuilder.append(currentChar);
} else {
break;
}
}
// 匹配
if(isStringCondition(currentIx, txt, stringBuilder, context)) {
actualLength = stringBuilder.length();
}
return actualLength;
}
}

View File

@@ -1,6 +1,7 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
@@ -10,39 +11,11 @@ import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
* 抽象实现策略
*
* @author binbin.hou
* @since 0.3.2
* @since 0.4.0
*/
@ThreadSafe
public abstract class AbstractSensitiveCheck implements ISensitiveCheck {
/**
* 当前字符串是否符合规范
* @param mappingChar 当前字符
* @param index 下标
* @param rawText 原始文本
* @param context 上下文
* @return 结果
* @since 0.3.2
*/
protected abstract boolean isCharCondition(char mappingChar,
int index,
String rawText,
final IWordContext context);
/**
* 这里指定一个阈值条件
* @param index 当前下标
* @param rawText 原始文本
* @param stringBuilder 缓存
* @param context 上下文
* @return 是否满足条件
* @since 0.3.2
*/
protected abstract boolean isStringCondition(int index,
String rawText,
final StringBuilder stringBuilder,
final IWordContext context);
/**
* 获取校验类
* @return 类
@@ -50,41 +23,49 @@ public abstract class AbstractSensitiveCheck implements ISensitiveCheck {
*/
protected abstract Class<? extends ISensitiveCheck> getSensitiveCheckClass();
/**
* 获取确切的长度
* @param txt 文本
* @param beginIndex 开始
* @param validModeEnum 校验枚举
* @param context 上下文
* @return 长度
* @since 0.4.0
*/
protected abstract int doGetActualLength(String txt, int beginIndex,
ValidModeEnum validModeEnum,
IWordContext context);
/**
* 获取确切的长度
* @param txt 文本
* @param beginIndex 开始
* @param validModeEnum 校验枚举
* @param context 上下文
* @return 长度
* @since 0.4.0
*/
protected int getActualLength(String txt, int beginIndex,
ValidModeEnum validModeEnum,
IWordContext context) {
if(StringUtil.isEmpty(txt)) {
return 0;
}
return doGetActualLength(txt, beginIndex, validModeEnum, context);
}
@Override
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex,
ValidModeEnum validModeEnum,
IWordContext context) {
// 采用 ThreadLocal 应该可以提升性能,减少对象的创建。
StringBuilder stringBuilder = new StringBuilder();
int actualLength = 0;
// 前一个条件
for(int i = beginIndex; i < txt.length(); i++) {
char currentChar = txt.charAt(i);
// 映射处理
char mappingChar = context.charFormat().format(currentChar, context);
// 符合条件
boolean currentCondition = isCharCondition(mappingChar, i, txt, context);
if(currentCondition) {
stringBuilder.append(currentChar);
// 匹配
if(isStringCondition(i, txt, stringBuilder, context)) {
actualLength = stringBuilder.length();
// 是否遍历全部匹配的模式
if(ValidModeEnum.FAIL_FAST.equals(validModeEnum)) {
break;
}
}
} else {
break;
}
Class<? extends ISensitiveCheck> clazz = getSensitiveCheckClass();
if(StringUtil.isEmpty(txt)) {
return SensitiveCheckResult.of(0, clazz);
}
int actualLength = getActualLength(txt, beginIndex, validModeEnum, context);
// 处理结果
return SensitiveCheckResult.of(actualLength, getSensitiveCheckClass());
return SensitiveCheckResult.of(actualLength, clazz);
}
}

View File

@@ -4,9 +4,8 @@ import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.util.regex.RegexUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
/**
* email 正则表达式检测实现。
@@ -24,7 +23,7 @@ import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
* @since 0.0.9
*/
@ThreadSafe
public class SensitiveCheckEmail extends AbstractSensitiveCheck {
public class SensitiveCheckEmail extends AbstractConditionSensitiveCheck {
/**
* @since 0.3.0
@@ -42,6 +41,16 @@ public class SensitiveCheckEmail extends AbstractSensitiveCheck {
@Override
protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) {
int bufferLen = stringBuilder.length();
//x@a.cn
if(bufferLen < 6) {
return false;
}
if(bufferLen > AppConst.MAX_EMAIL_LEN) {
return false;
}
String string = stringBuilder.toString();
return RegexUtil.isEmail(string);
}

View File

@@ -12,7 +12,7 @@ import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
* @since 0.0.5
*/
@ThreadSafe
public class SensitiveCheckNum extends AbstractSensitiveCheck {
public class SensitiveCheckNum extends AbstractConditionSensitiveCheck {
/**
* @since 0.3.0

View File

@@ -20,7 +20,7 @@ import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
* @since 0.0.9
*/
@ThreadSafe
public class SensitiveCheckUrl extends AbstractSensitiveCheck {
public class SensitiveCheckUrl extends AbstractConditionSensitiveCheck {
/**
* @since 0.3.0
@@ -39,6 +39,10 @@ public class SensitiveCheckUrl extends AbstractSensitiveCheck {
@Override
protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) {
int bufferLen = stringBuilder.length();
//a.cn
if(bufferLen < 4) {
return false;
}
if(bufferLen > AppConst.MAX_WEB_SITE_LEN) {
return false;
}

View File

@@ -2,6 +2,9 @@ package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordMap;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
/**
@@ -21,19 +24,44 @@ public class SensitiveCheckWord extends AbstractSensitiveCheck {
return INSTANCE;
}
@Override
protected boolean isCharCondition(char mappingChar, int index, String rawText, IWordContext context) {
return true;
}
@Override
protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) {
return context.wordMap().contains(stringBuilder.toString(), context);
}
@Override
protected Class<? extends ISensitiveCheck> getSensitiveCheckClass() {
return SensitiveCheckWord.class;
}
@Override
protected int doGetActualLength(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
// 采用 ThreadLocal 应该可以提升性能,减少对象的创建。
int actualLength = 0;
final IWordMap wordMap = context.wordMap();
// 前一个条件
StringBuilder stringBuilder = new StringBuilder();
for(int i = beginIndex; i < txt.length(); i++) {
char currentChar = txt.charAt(i);
// 映射处理
char mappingChar = context.charFormat().format(currentChar, context);
stringBuilder.append(mappingChar);
// 判断是否存在
WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder.toString(), context);
if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) {
actualLength = stringBuilder.length();
// 是否遍历全部匹配的模式
if(ValidModeEnum.FAIL_FAST.equals(validModeEnum)) {
break;
}
}
// 如果不包含,则直接返回。后续遍历无意义
if(WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnum)) {
break;
}
}
return actualLength;
}
}

View File

@@ -28,16 +28,16 @@ public final class SensitiveChecks {
public static ISensitiveCheck initSensitiveCheck(final IWordContext context) {
List<ISensitiveCheck> sensitiveCheckList = new ArrayList<>();
// 默认添加敏感词校验
sensitiveCheckList.add(SensitiveChecks.word());
if(context.sensitiveCheckNum()) {
if(context.enableWordCheck()) {
sensitiveCheckList.add(SensitiveChecks.word());
}
if(context.enableNumCheck()) {
sensitiveCheckList.add(SensitiveChecks.num());
}
if(context.sensitiveCheckEmail()) {
if(context.enableEmailCheck()) {
sensitiveCheckList.add(SensitiveChecks.email());
}
if(context.sensitiveCheckUrl()) {
if(context.enableUrlCheck()) {
sensitiveCheckList.add(SensitiveChecks.url());
}

View File

@@ -6,6 +6,7 @@ import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordMap;
import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
import java.util.Collection;
import java.util.HashMap;
@@ -98,15 +99,15 @@ public class WordMap implements IWordMap {
* @since 0.0.1
*/
@Override
public boolean contains(String string, final IWordContext context) {
public WordContainsTypeEnum contains(String string, final IWordContext context) {
if (StringUtil.isEmpty(string)) {
return false;
return WordContainsTypeEnum.NOT_FOUND;
}
return innerContainsSensitive(string, context);
}
private boolean innerContainsSensitive(String txt,
private WordContainsTypeEnum innerContainsSensitive(String txt,
IWordContext context) {
// 初始化为当前的 map
Map nowMap = this.innerWordMap;
@@ -118,11 +119,17 @@ public class WordMap implements IWordMap {
// 如果不为空,则判断是否为结尾。
if (ObjectUtil.isNull(nowMap)) {
return false;
return WordContainsTypeEnum.NOT_FOUND;
}
}
return isEnd(nowMap);
// 是否为结尾,便于快速失败
boolean isEnd = isEnd(nowMap);
if(isEnd) {
return WordContainsTypeEnum.CONTAINS_END;
}
return WordContainsTypeEnum.CONTAINS_PREFIX;
}
/**

View File

@@ -5,6 +5,8 @@ import com.github.houbb.heaven.constant.CharConst;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
/**
* 指定字符的替换策略
@@ -29,10 +31,11 @@ public class SensitiveWordReplaceChar implements ISensitiveWordReplace {
}
@Override
public String replace(ISensitiveWordReplaceContext context) {
int wordLength = context.wordLength();
return CharUtil.repeat(replaceChar, wordLength);
public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) {
int wordLen = wordResult.endIndex() - wordResult.startIndex();
for(int i = 0; i < wordLen; i++) {
stringBuilder.append(replaceChar);
}
}
}

View File

@@ -0,0 +1,56 @@
package com.github.houbb.sensitive.word.benchmark;
import com.github.houbb.heaven.util.util.RandomUtil;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
import com.github.houbb.sensitive.word.core.SensitiveWordHelper;
import org.junit.Ignore;
import org.junit.Test;
@Ignore
public class BasicTest {
/**
*
*
* 100*100 耗时926ms性能較差。
*
* 100*100000 的字符12942ms 第一次优化。
*/
@Test
public void costTimeTest() {
String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100)
+ "我们他妈的从来不说脏说";
// 1W 次
long start = System.currentTimeMillis();
for(int i = 0; i < 10000; i++) {
SensitiveWordHelper.findAll(randomText);
}
long end = System.currentTimeMillis();
System.out.println("------------------ COST: " + (end-start));
}
/**
*
* 100*100000 的字符12440ms
*/
@Test
public void costTimeOnlyWordTest() {
String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100)
+ "我们他妈的从来不说脏说";
// 1W 次
long start = System.currentTimeMillis();
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
.enableWordCheck(false)
.init();
for(int i = 0; i < 10000; i++) {
sensitiveWordBs.findAll(randomText);
}
long end = System.currentTimeMillis();
System.out.println("------------------ COST: " + (end-start));
}
}

View File

@@ -1,5 +1,6 @@
package com.github.houbb.sensitive.word.data;
import org.junit.Ignore;
import org.junit.Test;
import java.util.Arrays;
@@ -9,6 +10,7 @@ import java.util.List;
* @author binbin.hou
* @since 0.0.11
*/
@Ignore
public class NumUtilTest {
@Test

View File

@@ -1,8 +1,8 @@
package com.github.houbb.sensitive.word.replace;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
/**
* 自定义敏感词替换策略
@@ -13,19 +13,20 @@ import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
public class MySensitiveWordReplace implements ISensitiveWordReplace {
@Override
public String replace(ISensitiveWordReplaceContext context) {
String sensitiveWord = context.sensitiveWord();
public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) {
String sensitiveWord = wordResult.word();
// 自定义不同的敏感词替换策略,可以从数据库等地方读取
if("五星红旗".equals(sensitiveWord)) {
return "国家旗帜";
stringBuilder.append("国家旗帜");
} else if("毛主席".equals(sensitiveWord)) {
stringBuilder.append("教员");
} else {
// 其他默认使用 * 代替
int wordLength = wordResult.endIndex() - wordResult.startIndex();
for(int i = 0; i < wordLength; i++) {
stringBuilder.append('*');
}
}
if("毛主席".equals(sensitiveWord)) {
return "教员";
}
// 其他默认使用 * 代替
int wordLength = context.wordLength();
return CharUtil.repeat('*', wordLength);
}
}