mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
release branch 0.4.0
This commit is contained in:
@@ -163,3 +163,10 @@
|
||||
| 1 | O | 中文繁简体样式 | 2023-06-07 23:51:58 | 调整实现策略 |
|
||||
| 2 | A | 代码结构优化 | 2023-06-07 23:51:58 | 调整实现策略 |
|
||||
|
||||
# release_0.4.0
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|:------------|:--------------------|:-------|
|
||||
| 1 | O | 优化单词校验逻辑 | 2023-06-08 23:51:58 | |
|
||||
| 2 | A | 新增是否单词校验的开关 | 2023-06-08 23:51:58 | |
|
||||
|
||||
|
||||
11
README.md
11
README.md
@@ -366,8 +366,9 @@ Assert.assertTrue(wordBs.contains(text));
|
||||
| 7 | enableNumCheck | 是否启用数字检测。 | true |
|
||||
| 8 | enableEmailCheck | 是有启用邮箱检测 | true |
|
||||
| 9 | enableUrlCheck | 是否启用链接检测 | true |
|
||||
| 10 | numCheckLen | 数字检测,自定义指定长度。 | 8 |
|
||||
| 11 | sensitiveWordReplace | 敏感词替换策略 | `*` 替换 |
|
||||
| 10 | enableUrlCheck | 是否启用敏感单词检测 | true |
|
||||
| 11 | numCheckLen | 数字检测,自定义指定长度。 | 8 |
|
||||
| 12 | sensitiveWordReplace | 敏感词替换策略 | `*` 替换 |
|
||||
|
||||
# 动态加载(用户自定义)
|
||||
|
||||
@@ -617,7 +618,11 @@ public class SensitiveWordService {
|
||||
|
||||
# 后期 road-map
|
||||
|
||||
- [ ] wordMap 的抽象,便于拓展
|
||||
- [x] wordMap 的抽象,便于拓展
|
||||
|
||||
- [ ] word 的统一性能优化,移除 string 的生成
|
||||
|
||||
- [ ] word 策略的优化,统一遍历+转换
|
||||
|
||||
- 同音字处理
|
||||
|
||||
|
||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.3.2</version>
|
||||
<version>0.4.0</version>
|
||||
|
||||
<properties>
|
||||
<!--============================== All Plugins START ==============================-->
|
||||
|
||||
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
|
||||
|
||||
:: 版本号信息(需要手动指定)
|
||||
:::: 旧版本名称
|
||||
SET version=0.3.2
|
||||
SET version=0.4.0
|
||||
:::: 新版本名称
|
||||
SET newVersion=0.3.3
|
||||
SET newVersion=0.5.0
|
||||
:::: 组织名称
|
||||
SET groupName=com.github.houbb
|
||||
:::: 项目名称
|
||||
|
||||
@@ -10,10 +10,15 @@ public interface ISensitiveWordReplace {
|
||||
|
||||
/**
|
||||
* 替换
|
||||
* @param context 上下文
|
||||
* @return 结果
|
||||
* @since 0.2.0
|
||||
* <p>
|
||||
* 説明:废弃以前的字符串返回,减少对象创建,提升性能。
|
||||
*
|
||||
* @param stringBuilder 字符串连接器
|
||||
* @param rawChars 原始字符串
|
||||
* @param wordResult 当前的敏感词结果
|
||||
* @param wordContext 上下文
|
||||
* @since 0.4.0
|
||||
*/
|
||||
String replace(ISensitiveWordReplaceContext context);
|
||||
void replace(final StringBuilder stringBuilder, final char[] rawChars, final IWordResult wordResult, final IWordContext wordContext);
|
||||
|
||||
}
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
package com.github.houbb.sensitive.word.api;
|
||||
|
||||
import com.github.houbb.sensitive.word.bs.SensitiveWordContext;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author binbin.hou
|
||||
* @since 0.0.4
|
||||
@@ -72,64 +69,62 @@ public interface IWordContext {
|
||||
IWordContext ignoreChineseStyle(final boolean ignoreChineseStyle);
|
||||
|
||||
/**
|
||||
* 获取敏感词信息
|
||||
* @return 敏感词
|
||||
* @since 0.0.5
|
||||
* 是否启用单词
|
||||
* @return 是否
|
||||
*/
|
||||
Map sensitiveWordMap();
|
||||
boolean enableWordCheck();
|
||||
|
||||
/**
|
||||
* 敏感词信息
|
||||
* @param map map 信息
|
||||
* @return this
|
||||
* @since 0.0.5
|
||||
* 设置是否启用单词
|
||||
* @param enableWordCheck 是否
|
||||
* @return 结果
|
||||
*/
|
||||
IWordContext sensitiveWordMap(final Map map);
|
||||
IWordContext enableWordCheck(boolean enableWordCheck);
|
||||
|
||||
/**
|
||||
* 敏感数字检测
|
||||
* @return 数字检测
|
||||
* @since 0.0.5
|
||||
*/
|
||||
boolean sensitiveCheckNum();
|
||||
boolean enableNumCheck();
|
||||
|
||||
/**
|
||||
* 设置敏感数字检测
|
||||
* @param sensitiveCheckNum 数字格式检测
|
||||
* @param enableNumCheck 数字格式检测
|
||||
* @return this
|
||||
* @since 0.0.5
|
||||
*/
|
||||
IWordContext sensitiveCheckNum(final boolean sensitiveCheckNum);
|
||||
IWordContext enableNumCheck(final boolean enableNumCheck);
|
||||
|
||||
/**
|
||||
* 是否进行邮箱检测
|
||||
* @return this
|
||||
* @since 0.0.9
|
||||
*/
|
||||
boolean sensitiveCheckEmail();
|
||||
boolean enableEmailCheck();
|
||||
|
||||
/**
|
||||
* 设置敏感邮箱检测
|
||||
* @param sensitiveCheckEmail 是否检测
|
||||
* @param enableEmailCheck 是否检测
|
||||
* @return this
|
||||
* @since 0.0.9
|
||||
*/
|
||||
IWordContext sensitiveCheckEmail(final boolean sensitiveCheckEmail);
|
||||
IWordContext enableEmailCheck(final boolean enableEmailCheck);
|
||||
|
||||
/**
|
||||
* 敏感链接检测
|
||||
* @return 是否启用
|
||||
* @since 0.
|
||||
*/
|
||||
boolean sensitiveCheckUrl();
|
||||
boolean enableUrlCheck();
|
||||
|
||||
/**
|
||||
* 设置敏感邮箱检测
|
||||
* @param sensitiveCheckUrl 是否检测
|
||||
* @param enableUrlCheck 是否检测
|
||||
* @return this
|
||||
* @since 0.0.9
|
||||
*/
|
||||
IWordContext sensitiveCheckUrl(final boolean sensitiveCheckUrl);
|
||||
IWordContext enableUrlCheck(final boolean enableUrlCheck);
|
||||
|
||||
/**
|
||||
* 忽略英文的写法
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package com.github.houbb.sensitive.word.api;
|
||||
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
|
||||
import java.util.Collection;
|
||||
@@ -29,7 +30,7 @@ public interface IWordMap {
|
||||
* @since 0.0.1
|
||||
* @see ValidModeEnum#FAIL_FAST 建议使用快速返回模式
|
||||
*/
|
||||
boolean contains(final String string,
|
||||
final IWordContext context);
|
||||
WordContainsTypeEnum contains(final String string,
|
||||
final IWordContext context);
|
||||
|
||||
}
|
||||
|
||||
@@ -65,21 +65,27 @@ public class SensitiveWordBs {
|
||||
/**
|
||||
* 启用数字检测
|
||||
*/
|
||||
private boolean sensitiveCheckNum = true;
|
||||
private boolean enableNumCheck = true;
|
||||
/**
|
||||
* 启用邮箱检测
|
||||
*/
|
||||
private boolean sensitiveCheckEmail = true;
|
||||
private boolean enableEmailCheck = true;
|
||||
/**
|
||||
* 启用 URL 检测
|
||||
*/
|
||||
private boolean sensitiveCheckUrl = true;
|
||||
private boolean enableUrlCheck = true;
|
||||
|
||||
/**
|
||||
* 单词校验
|
||||
* @since 0.4.0
|
||||
*/
|
||||
private boolean enableWordCheck = true;
|
||||
|
||||
// 额外配置
|
||||
/**
|
||||
* 检测数字时的长度
|
||||
*/
|
||||
private int sensitiveCheckNumLen = 8;
|
||||
private int numCheckLen = 8;
|
||||
|
||||
//------------------------------------------------------------- 基本属性 END
|
||||
/**
|
||||
@@ -177,12 +183,13 @@ public class SensitiveWordBs {
|
||||
context.ignoreRepeat(ignoreRepeat);
|
||||
|
||||
// 开启校验
|
||||
context.sensitiveCheckNum(sensitiveCheckNum);
|
||||
context.sensitiveCheckEmail(sensitiveCheckEmail);
|
||||
context.sensitiveCheckUrl(sensitiveCheckUrl);
|
||||
context.enableNumCheck(enableNumCheck);
|
||||
context.enableEmailCheck(enableEmailCheck);
|
||||
context.enableUrlCheck(enableUrlCheck);
|
||||
context.enableWordCheck(enableWordCheck);
|
||||
|
||||
// 额外配置
|
||||
context.sensitiveCheckNumLen(sensitiveCheckNumLen);
|
||||
context.sensitiveCheckNumLen(numCheckLen);
|
||||
context.sensitiveWordReplace(sensitiveWordReplace);
|
||||
context.wordMap(wordMap);
|
||||
|
||||
@@ -247,6 +254,18 @@ public class SensitiveWordBs {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置是否启动数字检测
|
||||
*
|
||||
* @param enableWordCheck 数字检测
|
||||
* @since 0.0.11
|
||||
* @return this
|
||||
*/
|
||||
public SensitiveWordBs enableWordCheck(boolean enableWordCheck) {
|
||||
this.enableWordCheck = enableWordCheck;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置是否启动数字检测
|
||||
*
|
||||
@@ -255,7 +274,7 @@ public class SensitiveWordBs {
|
||||
* @return this
|
||||
*/
|
||||
public SensitiveWordBs enableNumCheck(boolean enableNumCheck) {
|
||||
this.sensitiveCheckNum = enableNumCheck;
|
||||
this.enableNumCheck = enableNumCheck;
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -266,7 +285,7 @@ public class SensitiveWordBs {
|
||||
* @since 0.2.1
|
||||
*/
|
||||
public SensitiveWordBs numCheckLen(int numCheckLen) {
|
||||
this.sensitiveCheckNumLen = numCheckLen;
|
||||
this.numCheckLen = numCheckLen;
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -278,7 +297,7 @@ public class SensitiveWordBs {
|
||||
* @return this
|
||||
*/
|
||||
public SensitiveWordBs enableEmailCheck(boolean enableEmailCheck) {
|
||||
this.sensitiveCheckEmail = enableEmailCheck;
|
||||
this.enableEmailCheck = enableEmailCheck;
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -290,7 +309,7 @@ public class SensitiveWordBs {
|
||||
* @return this
|
||||
*/
|
||||
public SensitiveWordBs enableUrlCheck(boolean enableUrlCheck) {
|
||||
this.sensitiveCheckUrl = enableUrlCheck;
|
||||
this.enableUrlCheck = enableUrlCheck;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.*;
|
||||
import com.github.houbb.sensitive.word.api.ICharFormat;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordMap;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* 上下文
|
||||
* @author binbin.hou
|
||||
@@ -31,17 +32,16 @@ public class SensitiveWordContext implements IWordContext {
|
||||
private boolean ignoreNumStyle;
|
||||
|
||||
/**
|
||||
* 敏感词信息
|
||||
* @since 0.0.5
|
||||
* 启动单词校验
|
||||
* @since 0.4.0
|
||||
*/
|
||||
@Deprecated
|
||||
private Map sensitiveWordMap;
|
||||
private boolean enableWordCheck;
|
||||
|
||||
/**
|
||||
* 是否进行敏感数字检测
|
||||
* @since 0.0.6
|
||||
*/
|
||||
private boolean sensitiveCheckNum;
|
||||
private boolean enableNumCheck;
|
||||
|
||||
/**
|
||||
* 是否忽略中文繁简体
|
||||
@@ -65,13 +65,13 @@ public class SensitiveWordContext implements IWordContext {
|
||||
* 是否进行邮箱测试
|
||||
* @since 0.0.9
|
||||
*/
|
||||
private boolean sensitiveCheckEmail;
|
||||
private boolean enableEmailCheck;
|
||||
|
||||
/**
|
||||
* 是否进行 url 测试
|
||||
* @since 0.0.12
|
||||
*/
|
||||
private boolean sensitiveCheckUrl;
|
||||
private boolean enableUrlCheck;
|
||||
|
||||
/**
|
||||
* 敏感数字检测对应的长度限制
|
||||
@@ -182,25 +182,23 @@ public class SensitiveWordContext implements IWordContext {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map sensitiveWordMap() {
|
||||
return sensitiveWordMap;
|
||||
public boolean enableWordCheck() {
|
||||
return enableWordCheck;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SensitiveWordContext sensitiveWordMap(Map sensitiveWordMap) {
|
||||
this.sensitiveWordMap = sensitiveWordMap;
|
||||
public SensitiveWordContext enableWordCheck(boolean enableWordCheck) {
|
||||
this.enableWordCheck = enableWordCheck;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean sensitiveCheckNum() {
|
||||
return sensitiveCheckNum;
|
||||
public boolean enableNumCheck() {
|
||||
return enableNumCheck;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SensitiveWordContext sensitiveCheckNum(boolean sensitiveCheckNum) {
|
||||
this.sensitiveCheckNum = sensitiveCheckNum;
|
||||
public SensitiveWordContext enableNumCheck(boolean enableNumCheck) {
|
||||
this.enableNumCheck = enableNumCheck;
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -238,24 +236,24 @@ public class SensitiveWordContext implements IWordContext {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean sensitiveCheckEmail() {
|
||||
return sensitiveCheckEmail;
|
||||
public boolean enableEmailCheck() {
|
||||
return enableEmailCheck;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SensitiveWordContext sensitiveCheckEmail(boolean sensitiveCheckEmail) {
|
||||
this.sensitiveCheckEmail = sensitiveCheckEmail;
|
||||
public SensitiveWordContext enableEmailCheck(boolean enableEmailCheck) {
|
||||
this.enableEmailCheck = enableEmailCheck;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean sensitiveCheckUrl() {
|
||||
return sensitiveCheckUrl;
|
||||
public boolean enableUrlCheck() {
|
||||
return enableUrlCheck;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SensitiveWordContext sensitiveCheckUrl(boolean sensitiveCheckUrl) {
|
||||
this.sensitiveCheckUrl = sensitiveCheckUrl;
|
||||
public SensitiveWordContext enableUrlCheck(boolean enableUrlCheck) {
|
||||
this.enableUrlCheck = enableUrlCheck;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
@@ -19,34 +19,16 @@ public final class AppConst {
|
||||
*/
|
||||
public static final String IS_END = "ED";
|
||||
|
||||
/**
|
||||
* 字典的大小
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public static final int DICT_SIZE = 65275;
|
||||
|
||||
/**
|
||||
* 英语词典的大小
|
||||
* @since 0.0.4
|
||||
*/
|
||||
public static final int DICT_EN_SIZE = 12;
|
||||
|
||||
/**
|
||||
* 拒绝的词语
|
||||
* @since 0.0.8
|
||||
*/
|
||||
public static final String SENSITIVE_WORD_DENY_PATH = "/sensitive_word_deny.txt";
|
||||
|
||||
/**
|
||||
* 用户允许的词语
|
||||
* @since 0.0.8
|
||||
*/
|
||||
public static final String SENSITIVE_WORD_ALLOW_PATH = "/sensitive_word_allow.txt";
|
||||
|
||||
/**
|
||||
* 最长的网址长度
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public static final int MAX_WEB_SITE_LEN = 70;
|
||||
|
||||
/**
|
||||
* 最大邮箱地址
|
||||
* @since 0.4.0
|
||||
*/
|
||||
public static final int MAX_EMAIL_LEN = 64;
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
package com.github.houbb.sensitive.word.constant.enums;
|
||||
|
||||
/**
|
||||
* 单词包含类别
|
||||
* @since 0.4.0
|
||||
*/
|
||||
public enum WordContainsTypeEnum {
|
||||
|
||||
/**
|
||||
* 包含+前缀
|
||||
*/
|
||||
CONTAINS_PREFIX,
|
||||
|
||||
/**
|
||||
* 包含+且是结尾
|
||||
*/
|
||||
CONTAINS_END,
|
||||
|
||||
/**
|
||||
* 不存在
|
||||
*/
|
||||
NOT_FOUND,
|
||||
|
||||
}
|
||||
@@ -3,11 +3,9 @@ package com.github.houbb.sensitive.word.core;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.sensitive.word.api.*;
|
||||
import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceContext;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* 抽象实现
|
||||
@@ -42,7 +40,7 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
|
||||
|
||||
// 注意边界
|
||||
int startIndex = 0;
|
||||
char[] chars = target.toCharArray();
|
||||
char[] rawChars = target.toCharArray();
|
||||
|
||||
for(IWordResult wordResult : allList) {
|
||||
final int itemStartIx = wordResult.startIndex();
|
||||
@@ -50,24 +48,19 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
|
||||
|
||||
// 脱敏的左边
|
||||
if(startIndex < itemStartIx) {
|
||||
stringBuilder.append(chars, startIndex, itemStartIx-startIndex);
|
||||
stringBuilder.append(rawChars, startIndex, itemStartIx-startIndex);
|
||||
}
|
||||
|
||||
// 脱敏部分
|
||||
String word = wordResult.word();
|
||||
ISensitiveWordReplaceContext replaceContext = SensitiveWordReplaceContext.newInstance()
|
||||
.sensitiveWord(word)
|
||||
.wordLength(word.length());
|
||||
String replacedText = replace.replace(replaceContext);
|
||||
stringBuilder.append(replacedText);
|
||||
replace.replace(stringBuilder, rawChars, wordResult, context);
|
||||
|
||||
// 更新结尾
|
||||
startIndex = Math.max(startIndex, itemEndIx);
|
||||
}
|
||||
|
||||
// 最后部分
|
||||
if (startIndex < chars.length) {
|
||||
stringBuilder.append(chars, startIndex, chars.length-startIndex);
|
||||
if (startIndex < rawChars.length) {
|
||||
stringBuilder.append(rawChars, startIndex, rawChars.length-startIndex);
|
||||
}
|
||||
|
||||
return stringBuilder.toString();
|
||||
|
||||
@@ -57,6 +57,7 @@ public class SensitiveWord extends AbstractSensitiveWord {
|
||||
int wordLength = checkResult.index();
|
||||
if (wordLength > 0) {
|
||||
// 保存敏感词
|
||||
// TODO: 这其实是一个比较消耗的操作,后续可以考虑简化掉。
|
||||
String sensitiveWord = text.substring(i, i + wordLength);
|
||||
|
||||
// 添加去重
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
|
||||
/**
|
||||
* 抽象实现策略
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.3.2
|
||||
*/
|
||||
@ThreadSafe
|
||||
public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveCheck {
|
||||
|
||||
/**
|
||||
* 当前字符串是否符合规范
|
||||
* @param mappingChar 当前字符
|
||||
* @param index 下标
|
||||
* @param rawText 原始文本
|
||||
* @param context 上下文
|
||||
* @return 结果
|
||||
* @since 0.3.2
|
||||
*/
|
||||
protected abstract boolean isCharCondition(char mappingChar,
|
||||
int index,
|
||||
String rawText,
|
||||
final IWordContext context);
|
||||
|
||||
/**
|
||||
* 这里指定一个阈值条件
|
||||
* @param index 当前下标
|
||||
* @param rawText 原始文本
|
||||
* @param stringBuilder 缓存
|
||||
* @param context 上下文
|
||||
* @return 是否满足条件
|
||||
* @since 0.3.2
|
||||
*/
|
||||
protected abstract boolean isStringCondition(int index,
|
||||
String rawText,
|
||||
final StringBuilder stringBuilder,
|
||||
final IWordContext context);
|
||||
|
||||
@Override
|
||||
protected int doGetActualLength(String txt, int beginIndex,
|
||||
ValidModeEnum validModeEnum,
|
||||
IWordContext context) {
|
||||
int actualLength = 0;
|
||||
|
||||
// 采用 ThreadLocal 应该可以提升性能,减少对象的创建。
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
// 前一个条件
|
||||
boolean preCondition = false;
|
||||
int currentIx = 0;
|
||||
for(int i = beginIndex; i < txt.length(); i++) {
|
||||
currentIx = i;
|
||||
char currentChar = txt.charAt(i);
|
||||
|
||||
// 映射处理
|
||||
char mappingChar = context.charFormat().format(currentChar, context);
|
||||
|
||||
// 符合条件
|
||||
boolean currentCondition = isCharCondition(mappingChar, i, txt, context);
|
||||
|
||||
//4 个场景
|
||||
if(currentCondition) {
|
||||
stringBuilder.append(currentChar);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// 匹配
|
||||
if(isStringCondition(currentIx, txt, stringBuilder, context)) {
|
||||
actualLength = stringBuilder.length();
|
||||
}
|
||||
|
||||
return actualLength;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
@@ -10,39 +11,11 @@ import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
* 抽象实现策略
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.3.2
|
||||
* @since 0.4.0
|
||||
*/
|
||||
@ThreadSafe
|
||||
public abstract class AbstractSensitiveCheck implements ISensitiveCheck {
|
||||
|
||||
/**
|
||||
* 当前字符串是否符合规范
|
||||
* @param mappingChar 当前字符
|
||||
* @param index 下标
|
||||
* @param rawText 原始文本
|
||||
* @param context 上下文
|
||||
* @return 结果
|
||||
* @since 0.3.2
|
||||
*/
|
||||
protected abstract boolean isCharCondition(char mappingChar,
|
||||
int index,
|
||||
String rawText,
|
||||
final IWordContext context);
|
||||
|
||||
/**
|
||||
* 这里指定一个阈值条件
|
||||
* @param index 当前下标
|
||||
* @param rawText 原始文本
|
||||
* @param stringBuilder 缓存
|
||||
* @param context 上下文
|
||||
* @return 是否满足条件
|
||||
* @since 0.3.2
|
||||
*/
|
||||
protected abstract boolean isStringCondition(int index,
|
||||
String rawText,
|
||||
final StringBuilder stringBuilder,
|
||||
final IWordContext context);
|
||||
|
||||
/**
|
||||
* 获取校验类
|
||||
* @return 类
|
||||
@@ -50,41 +23,49 @@ public abstract class AbstractSensitiveCheck implements ISensitiveCheck {
|
||||
*/
|
||||
protected abstract Class<? extends ISensitiveCheck> getSensitiveCheckClass();
|
||||
|
||||
/**
|
||||
* 获取确切的长度
|
||||
* @param txt 文本
|
||||
* @param beginIndex 开始
|
||||
* @param validModeEnum 校验枚举
|
||||
* @param context 上下文
|
||||
* @return 长度
|
||||
* @since 0.4.0
|
||||
*/
|
||||
protected abstract int doGetActualLength(String txt, int beginIndex,
|
||||
ValidModeEnum validModeEnum,
|
||||
IWordContext context);
|
||||
|
||||
/**
|
||||
* 获取确切的长度
|
||||
* @param txt 文本
|
||||
* @param beginIndex 开始
|
||||
* @param validModeEnum 校验枚举
|
||||
* @param context 上下文
|
||||
* @return 长度
|
||||
* @since 0.4.0
|
||||
*/
|
||||
protected int getActualLength(String txt, int beginIndex,
|
||||
ValidModeEnum validModeEnum,
|
||||
IWordContext context) {
|
||||
if(StringUtil.isEmpty(txt)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return doGetActualLength(txt, beginIndex, validModeEnum, context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex,
|
||||
ValidModeEnum validModeEnum,
|
||||
IWordContext context) {
|
||||
// 采用 ThreadLocal 应该可以提升性能,减少对象的创建。
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
int actualLength = 0;
|
||||
// 前一个条件
|
||||
for(int i = beginIndex; i < txt.length(); i++) {
|
||||
char currentChar = txt.charAt(i);
|
||||
|
||||
// 映射处理
|
||||
char mappingChar = context.charFormat().format(currentChar, context);
|
||||
|
||||
// 符合条件
|
||||
boolean currentCondition = isCharCondition(mappingChar, i, txt, context);
|
||||
if(currentCondition) {
|
||||
stringBuilder.append(currentChar);
|
||||
|
||||
// 匹配
|
||||
if(isStringCondition(i, txt, stringBuilder, context)) {
|
||||
actualLength = stringBuilder.length();
|
||||
|
||||
// 是否遍历全部匹配的模式
|
||||
if(ValidModeEnum.FAIL_FAST.equals(validModeEnum)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
Class<? extends ISensitiveCheck> clazz = getSensitiveCheckClass();
|
||||
if(StringUtil.isEmpty(txt)) {
|
||||
return SensitiveCheckResult.of(0, clazz);
|
||||
}
|
||||
int actualLength = getActualLength(txt, beginIndex, validModeEnum, context);
|
||||
|
||||
// 处理结果
|
||||
return SensitiveCheckResult.of(actualLength, getSensitiveCheckClass());
|
||||
return SensitiveCheckResult.of(actualLength, clazz);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -4,9 +4,8 @@ import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.util.lang.CharUtil;
|
||||
import com.github.houbb.heaven.util.util.regex.RegexUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.constant.AppConst;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
|
||||
/**
|
||||
* email 正则表达式检测实现。
|
||||
@@ -24,7 +23,7 @@ import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
* @since 0.0.9
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class SensitiveCheckEmail extends AbstractSensitiveCheck {
|
||||
public class SensitiveCheckEmail extends AbstractConditionSensitiveCheck {
|
||||
|
||||
/**
|
||||
* @since 0.3.0
|
||||
@@ -42,6 +41,16 @@ public class SensitiveCheckEmail extends AbstractSensitiveCheck {
|
||||
|
||||
@Override
|
||||
protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) {
|
||||
int bufferLen = stringBuilder.length();
|
||||
|
||||
//x@a.cn
|
||||
if(bufferLen < 6) {
|
||||
return false;
|
||||
}
|
||||
if(bufferLen > AppConst.MAX_EMAIL_LEN) {
|
||||
return false;
|
||||
}
|
||||
|
||||
String string = stringBuilder.toString();
|
||||
return RegexUtil.isEmail(string);
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
* @since 0.0.5
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class SensitiveCheckNum extends AbstractSensitiveCheck {
|
||||
public class SensitiveCheckNum extends AbstractConditionSensitiveCheck {
|
||||
|
||||
/**
|
||||
* @since 0.3.0
|
||||
|
||||
@@ -20,7 +20,7 @@ import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
* @since 0.0.9
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class SensitiveCheckUrl extends AbstractSensitiveCheck {
|
||||
public class SensitiveCheckUrl extends AbstractConditionSensitiveCheck {
|
||||
|
||||
/**
|
||||
* @since 0.3.0
|
||||
@@ -39,6 +39,10 @@ public class SensitiveCheckUrl extends AbstractSensitiveCheck {
|
||||
@Override
|
||||
protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) {
|
||||
int bufferLen = stringBuilder.length();
|
||||
//a.cn
|
||||
if(bufferLen < 4) {
|
||||
return false;
|
||||
}
|
||||
if(bufferLen > AppConst.MAX_WEB_SITE_LEN) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,9 @@ package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordMap;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
|
||||
/**
|
||||
@@ -21,19 +24,44 @@ public class SensitiveCheckWord extends AbstractSensitiveCheck {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isCharCondition(char mappingChar, int index, String rawText, IWordContext context) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isStringCondition(int index, String rawText, StringBuilder stringBuilder, IWordContext context) {
|
||||
return context.wordMap().contains(stringBuilder.toString(), context);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Class<? extends ISensitiveCheck> getSensitiveCheckClass() {
|
||||
return SensitiveCheckWord.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int doGetActualLength(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
// 采用 ThreadLocal 应该可以提升性能,减少对象的创建。
|
||||
int actualLength = 0;
|
||||
final IWordMap wordMap = context.wordMap();
|
||||
|
||||
// 前一个条件
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
for(int i = beginIndex; i < txt.length(); i++) {
|
||||
char currentChar = txt.charAt(i);
|
||||
|
||||
// 映射处理
|
||||
char mappingChar = context.charFormat().format(currentChar, context);
|
||||
stringBuilder.append(mappingChar);
|
||||
|
||||
// 判断是否存在
|
||||
WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder.toString(), context);
|
||||
if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) {
|
||||
actualLength = stringBuilder.length();
|
||||
|
||||
// 是否遍历全部匹配的模式
|
||||
if(ValidModeEnum.FAIL_FAST.equals(validModeEnum)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// 如果不包含,则直接返回。后续遍历无意义
|
||||
if(WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnum)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return actualLength;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -28,16 +28,16 @@ public final class SensitiveChecks {
|
||||
public static ISensitiveCheck initSensitiveCheck(final IWordContext context) {
|
||||
List<ISensitiveCheck> sensitiveCheckList = new ArrayList<>();
|
||||
|
||||
// 默认添加敏感词校验
|
||||
sensitiveCheckList.add(SensitiveChecks.word());
|
||||
|
||||
if(context.sensitiveCheckNum()) {
|
||||
if(context.enableWordCheck()) {
|
||||
sensitiveCheckList.add(SensitiveChecks.word());
|
||||
}
|
||||
if(context.enableNumCheck()) {
|
||||
sensitiveCheckList.add(SensitiveChecks.num());
|
||||
}
|
||||
if(context.sensitiveCheckEmail()) {
|
||||
if(context.enableEmailCheck()) {
|
||||
sensitiveCheckList.add(SensitiveChecks.email());
|
||||
}
|
||||
if(context.sensitiveCheckUrl()) {
|
||||
if(context.enableUrlCheck()) {
|
||||
sensitiveCheckList.add(SensitiveChecks.url());
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordMap;
|
||||
import com.github.houbb.sensitive.word.constant.AppConst;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
@@ -98,15 +99,15 @@ public class WordMap implements IWordMap {
|
||||
* @since 0.0.1
|
||||
*/
|
||||
@Override
|
||||
public boolean contains(String string, final IWordContext context) {
|
||||
public WordContainsTypeEnum contains(String string, final IWordContext context) {
|
||||
if (StringUtil.isEmpty(string)) {
|
||||
return false;
|
||||
return WordContainsTypeEnum.NOT_FOUND;
|
||||
}
|
||||
|
||||
return innerContainsSensitive(string, context);
|
||||
}
|
||||
|
||||
private boolean innerContainsSensitive(String txt,
|
||||
private WordContainsTypeEnum innerContainsSensitive(String txt,
|
||||
IWordContext context) {
|
||||
// 初始化为当前的 map
|
||||
Map nowMap = this.innerWordMap;
|
||||
@@ -118,11 +119,17 @@ public class WordMap implements IWordMap {
|
||||
|
||||
// 如果不为空,则判断是否为结尾。
|
||||
if (ObjectUtil.isNull(nowMap)) {
|
||||
return false;
|
||||
return WordContainsTypeEnum.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
return isEnd(nowMap);
|
||||
// 是否为结尾,便于快速失败
|
||||
boolean isEnd = isEnd(nowMap);
|
||||
if(isEnd) {
|
||||
return WordContainsTypeEnum.CONTAINS_END;
|
||||
}
|
||||
|
||||
return WordContainsTypeEnum.CONTAINS_PREFIX;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -5,6 +5,8 @@ import com.github.houbb.heaven.constant.CharConst;
|
||||
import com.github.houbb.heaven.util.lang.CharUtil;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
|
||||
/**
|
||||
* 指定字符的替换策略
|
||||
@@ -29,10 +31,11 @@ public class SensitiveWordReplaceChar implements ISensitiveWordReplace {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String replace(ISensitiveWordReplaceContext context) {
|
||||
int wordLength = context.wordLength();
|
||||
|
||||
return CharUtil.repeat(replaceChar, wordLength);
|
||||
public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) {
|
||||
int wordLen = wordResult.endIndex() - wordResult.startIndex();
|
||||
for(int i = 0; i < wordLen; i++) {
|
||||
stringBuilder.append(replaceChar);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
package com.github.houbb.sensitive.word.benchmark;
|
||||
|
||||
import com.github.houbb.heaven.util.util.RandomUtil;
|
||||
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||
import com.github.houbb.sensitive.word.core.SensitiveWordHelper;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
@Ignore
|
||||
public class BasicTest {
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* 100*100 耗时:926ms,性能較差。
|
||||
*
|
||||
* 100*100000 的字符:12942ms 第一次优化。
|
||||
*/
|
||||
@Test
|
||||
public void costTimeTest() {
|
||||
String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100)
|
||||
+ "我们他妈的从来不说脏说";
|
||||
|
||||
|
||||
// 1W 次
|
||||
long start = System.currentTimeMillis();
|
||||
for(int i = 0; i < 10000; i++) {
|
||||
SensitiveWordHelper.findAll(randomText);
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
System.out.println("------------------ COST: " + (end-start));
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* 100*100000 的字符:12440ms
|
||||
*/
|
||||
@Test
|
||||
public void costTimeOnlyWordTest() {
|
||||
String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100)
|
||||
+ "我们他妈的从来不说脏说";
|
||||
|
||||
// 1W 次
|
||||
long start = System.currentTimeMillis();
|
||||
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
|
||||
.enableWordCheck(false)
|
||||
.init();
|
||||
|
||||
for(int i = 0; i < 10000; i++) {
|
||||
sensitiveWordBs.findAll(randomText);
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
System.out.println("------------------ COST: " + (end-start));
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
package com.github.houbb.sensitive.word.data;
|
||||
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
@@ -9,6 +10,7 @@ import java.util.List;
|
||||
* @author binbin.hou
|
||||
* @since 0.0.11
|
||||
*/
|
||||
@Ignore
|
||||
public class NumUtilTest {
|
||||
|
||||
@Test
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
package com.github.houbb.sensitive.word.replace;
|
||||
|
||||
import com.github.houbb.heaven.util.lang.CharUtil;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
|
||||
/**
|
||||
* 自定义敏感词替换策略
|
||||
@@ -13,19 +13,20 @@ import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
|
||||
public class MySensitiveWordReplace implements ISensitiveWordReplace {
|
||||
|
||||
@Override
|
||||
public String replace(ISensitiveWordReplaceContext context) {
|
||||
String sensitiveWord = context.sensitiveWord();
|
||||
public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) {
|
||||
String sensitiveWord = wordResult.word();
|
||||
// 自定义不同的敏感词替换策略,可以从数据库等地方读取
|
||||
if("五星红旗".equals(sensitiveWord)) {
|
||||
return "国家旗帜";
|
||||
stringBuilder.append("国家旗帜");
|
||||
} else if("毛主席".equals(sensitiveWord)) {
|
||||
stringBuilder.append("教员");
|
||||
} else {
|
||||
// 其他默认使用 * 代替
|
||||
int wordLength = wordResult.endIndex() - wordResult.startIndex();
|
||||
for(int i = 0; i < wordLength; i++) {
|
||||
stringBuilder.append('*');
|
||||
}
|
||||
}
|
||||
if("毛主席".equals(sensitiveWord)) {
|
||||
return "教员";
|
||||
}
|
||||
|
||||
// 其他默认使用 * 代替
|
||||
int wordLength = context.wordLength();
|
||||
return CharUtil.repeat('*', wordLength);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user