mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
release branch 0.0.12
This commit is contained in:
@@ -87,3 +87,9 @@
|
||||
|:---|:---|:---|:---|:--|
|
||||
| 1 | A | 添加对于数字过滤的可配置型 | 2020-1-14 22:48:12 | |
|
||||
| 2 | A | 添加部分敏感词 | 2020-1-14 22:48:12 | |
|
||||
|
||||
# release_0.0.12
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:---|:---|:---|:--|
|
||||
| 1 | A | 添加对于网址的过滤 | 2020-1-16 20:51:58 | |
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
# 是否为邮箱 check
|
||||
|
||||
|
||||
暂时先使用基本的正则表达式,
|
||||
|
||||
==================
|
||||
|
||||
网址等等
|
||||
@@ -13,6 +15,10 @@ Image-URL 检测,避免替换错误。
|
||||
|
||||
针对不同的信息脱敏,则需要知道对应的检测代码是什么。
|
||||
|
||||
jpg
|
||||
png
|
||||
jpeg
|
||||
gif
|
||||
|
||||
## 是否脱敏的配置
|
||||
|
||||
@@ -26,4 +32,11 @@ Image-URL 检测,避免替换错误。
|
||||
|
||||
可以直接开辟另一道验证方式。
|
||||
|
||||
直接 regex+全文检索实现。
|
||||
直接 regex+全文检索实现。
|
||||
|
||||
# 前提
|
||||
|
||||
首先实现 Regex
|
||||
|
||||
这里也可以支持 allow_regex/deny_regex
|
||||
|
||||
17
pom.xml
17
pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.0.12-SNAPSHOT</version>
|
||||
<version>0.0.12</version>
|
||||
|
||||
<properties>
|
||||
<!--============================== All Plugins START ==============================-->
|
||||
@@ -25,7 +25,7 @@
|
||||
<project.compiler.level>1.7</project.compiler.level>
|
||||
|
||||
<!--============================== INTER ==============================-->
|
||||
<heaven.version>0.1.72</heaven.version>
|
||||
<heaven.version>0.1.73</heaven.version>
|
||||
<opencc4j.version>1.2.0</opencc4j.version>
|
||||
|
||||
<!--============================== OTHER ==============================-->
|
||||
@@ -37,13 +37,6 @@
|
||||
<!--============================== SELF ==============================-->
|
||||
|
||||
<!--============================== INTER ==============================-->
|
||||
<dependency>
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>heaven</artifactId>
|
||||
<version>${heaven.version}</version>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>opencc4j</artifactId>
|
||||
@@ -59,6 +52,12 @@
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>heaven</artifactId>
|
||||
<version>${heaven.version}</version>
|
||||
</dependency>
|
||||
<!--============================== OTHER ==============================-->
|
||||
|
||||
<dependency>
|
||||
|
||||
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
|
||||
|
||||
:: 版本号信息(需要手动指定)
|
||||
:::: 旧版本名称
|
||||
SET version=0.0.11
|
||||
SET version=0.0.12
|
||||
:::: 新版本名称
|
||||
SET newVersion=0.0.12
|
||||
SET newVersion=0.0.13
|
||||
:::: 组织名称
|
||||
SET groupName=com.github.houbb
|
||||
:::: 项目名称
|
||||
|
||||
@@ -88,30 +88,45 @@ public interface IWordContext {
|
||||
* @return 数字检测
|
||||
* @since 0.0.5
|
||||
*/
|
||||
boolean sensitiveNumCheck();
|
||||
boolean sensitiveCheckNum();
|
||||
|
||||
/**
|
||||
* 设置敏感数字检测
|
||||
* @param sensitiveNumCheck 数字格式检测
|
||||
* @param sensitiveCheckNum 数字格式检测
|
||||
* @return this
|
||||
* @since 0.0.5
|
||||
*/
|
||||
IWordContext sensitiveNumCheck(final boolean sensitiveNumCheck);
|
||||
IWordContext sensitiveCheckNum(final boolean sensitiveCheckNum);
|
||||
|
||||
/**
|
||||
* 是否进行邮箱检测
|
||||
* @return this
|
||||
* @since 0.0.9
|
||||
*/
|
||||
boolean sensitiveEmailCheck();
|
||||
boolean sensitiveCheckEmail();
|
||||
|
||||
/**
|
||||
* 设置敏感邮箱检测
|
||||
* @param sensitiveEmailCheck 是否检测
|
||||
* @param sensitiveCheckEmail 是否检测
|
||||
* @return this
|
||||
* @since 0.0.9
|
||||
*/
|
||||
IWordContext sensitiveEmailCheck(final boolean sensitiveEmailCheck);
|
||||
IWordContext sensitiveCheckEmail(final boolean sensitiveCheckEmail);
|
||||
|
||||
/**
|
||||
* 敏感链接检测
|
||||
* @return 是否启用
|
||||
* @since 0.
|
||||
*/
|
||||
boolean sensitiveCheckUrl();
|
||||
|
||||
/**
|
||||
* 设置敏感邮箱检测
|
||||
* @param sensitiveCheckUrl 是否检测
|
||||
* @return this
|
||||
* @since 0.0.9
|
||||
*/
|
||||
IWordContext sensitiveCheckUrl(final boolean sensitiveCheckUrl);
|
||||
|
||||
/**
|
||||
* 忽略英文的写法
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package com.github.houbb.sensitive.word.api;
|
||||
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
@@ -11,6 +11,7 @@ import java.util.List;
|
||||
|
||||
/**
|
||||
* 敏感词引导类
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.0.1
|
||||
*/
|
||||
@@ -18,37 +19,36 @@ public class SensitiveWordBs {
|
||||
|
||||
/**
|
||||
* 私有化构造器
|
||||
*
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private SensitiveWordBs(){}
|
||||
private SensitiveWordBs() {
|
||||
}
|
||||
|
||||
/**
|
||||
* 敏感词 map
|
||||
*
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private static volatile IWordMap sensitiveWordMap;
|
||||
|
||||
/**
|
||||
* 默认的执行上下文
|
||||
*
|
||||
* @since 0.0.4
|
||||
*/
|
||||
private volatile IWordContext context;
|
||||
|
||||
/**
|
||||
* 是否启用数字校验
|
||||
* @since 0.0.11
|
||||
*/
|
||||
private boolean enableNumCheck = true;
|
||||
|
||||
/**
|
||||
* DCL 初始化 wordMap 信息
|
||||
*
|
||||
* @return 初始化后的结果
|
||||
* @since 0.0.4
|
||||
*/
|
||||
private static IWordMap initWordMap() {
|
||||
if(sensitiveWordMap == null) {
|
||||
if (sensitiveWordMap == null) {
|
||||
synchronized (IWordMap.class) {
|
||||
if(sensitiveWordMap == null) {
|
||||
if (sensitiveWordMap == null) {
|
||||
// 加载配置信息
|
||||
IWordData wordData = new SensitiveWordData();
|
||||
List<String> lines = wordData.getWordData();
|
||||
@@ -65,8 +65,9 @@ public class SensitiveWordBs {
|
||||
|
||||
/**
|
||||
* 新建验证实例
|
||||
*
|
||||
* <p>
|
||||
* double-lock
|
||||
*
|
||||
* @return this
|
||||
* @since 0.0.1
|
||||
*/
|
||||
@@ -81,16 +82,40 @@ public class SensitiveWordBs {
|
||||
|
||||
/**
|
||||
* 设置是否启动数字检测
|
||||
*
|
||||
* @param enableNumCheck 数字检测
|
||||
* @since 0.0.11
|
||||
*/
|
||||
public SensitiveWordBs enableNumCheck(boolean enableNumCheck) {
|
||||
this.context.sensitiveNumCheck(enableNumCheck);
|
||||
this.context.sensitiveCheckNum(enableNumCheck);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置是否启动 email 检测
|
||||
*
|
||||
* @param enableEmailCheck email 检测
|
||||
* @since 0.0.11
|
||||
*/
|
||||
public SensitiveWordBs enableEmailCheck(boolean enableEmailCheck) {
|
||||
this.context.sensitiveCheckEmail(enableEmailCheck);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置是否启动 url 检测
|
||||
*
|
||||
* @param enableUrlCheck url 检测
|
||||
* @since 0.0.12
|
||||
*/
|
||||
public SensitiveWordBs enableUrlCheck(boolean enableUrlCheck) {
|
||||
this.context.sensitiveCheckUrl(enableUrlCheck);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建默认的上下文
|
||||
*
|
||||
* @return 结果
|
||||
* @since 0.0.4
|
||||
*/
|
||||
@@ -105,13 +130,16 @@ public class SensitiveWordBs {
|
||||
wordContext.ignoreRepeat(true);
|
||||
|
||||
// 开启校验
|
||||
wordContext.sensitiveNumCheck(true);
|
||||
wordContext.sensitiveEmailCheck(true);
|
||||
wordContext.sensitiveCheckNum(true);
|
||||
wordContext.sensitiveCheckEmail(true);
|
||||
wordContext.sensitiveCheckUrl(true);
|
||||
|
||||
return wordContext;
|
||||
}
|
||||
|
||||
/**
|
||||
* 是否包含敏感词
|
||||
*
|
||||
* @param target 目标字符串
|
||||
* @return 是否
|
||||
* @since 0.0.1
|
||||
@@ -124,6 +152,7 @@ public class SensitiveWordBs {
|
||||
* 返回所有的敏感词
|
||||
* 1. 这里是默认去重的,且是有序的。
|
||||
* 2. 如果不存在,返回空列表
|
||||
*
|
||||
* @param target 目标字符串
|
||||
* @return 敏感词列表
|
||||
* @since 0.0.1
|
||||
@@ -135,6 +164,7 @@ public class SensitiveWordBs {
|
||||
/**
|
||||
* 返回第一个敏感词
|
||||
* (1)如果不存在,则返回 {@code null}
|
||||
*
|
||||
* @param target 目标字符串
|
||||
* @return 敏感词
|
||||
* @since 0.0.1
|
||||
@@ -145,7 +175,8 @@ public class SensitiveWordBs {
|
||||
|
||||
/**
|
||||
* 替换所有内容
|
||||
* @param target 目标字符串
|
||||
*
|
||||
* @param target 目标字符串
|
||||
* @param replaceChar 替换为的 char
|
||||
* @return 替换后结果
|
||||
* @since 0.0.2
|
||||
@@ -157,12 +188,13 @@ public class SensitiveWordBs {
|
||||
/**
|
||||
* 替换所有内容
|
||||
* 1. 默认使用空格替换,避免星号改变 md 的格式。
|
||||
*
|
||||
* @param target 目标字符串
|
||||
* @return 替换后结果
|
||||
* @since 0.0.2
|
||||
*/
|
||||
public String replace(final String target) {
|
||||
return this.replace(target, CharConst.BLANK);
|
||||
return this.replace(target, CharConst.STAR);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -39,7 +39,7 @@ public class SensitiveWordContext implements IWordContext {
|
||||
* 是否进行敏感数字检测
|
||||
* @since 0.0.6
|
||||
*/
|
||||
private boolean sensitiveNumCheck;
|
||||
private boolean sensitiveCheckNum;
|
||||
|
||||
/**
|
||||
* 是否忽略中文繁简体
|
||||
@@ -63,7 +63,13 @@ public class SensitiveWordContext implements IWordContext {
|
||||
* 是否进行邮箱测试
|
||||
* @since 0.0.9
|
||||
*/
|
||||
private boolean sensitiveEmailCheck;
|
||||
private boolean sensitiveCheckEmail;
|
||||
|
||||
/**
|
||||
* 是否进行 url 测试
|
||||
* @since 0.0.12
|
||||
*/
|
||||
private boolean sensitiveCheckUrl;
|
||||
|
||||
/**
|
||||
* 私有化构造器
|
||||
@@ -126,13 +132,13 @@ public class SensitiveWordContext implements IWordContext {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean sensitiveNumCheck() {
|
||||
return sensitiveNumCheck;
|
||||
public boolean sensitiveCheckNum() {
|
||||
return sensitiveCheckNum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SensitiveWordContext sensitiveNumCheck(boolean sensitiveNumCheck) {
|
||||
this.sensitiveNumCheck = sensitiveNumCheck;
|
||||
public SensitiveWordContext sensitiveCheckNum(boolean sensitiveCheckNum) {
|
||||
this.sensitiveCheckNum = sensitiveCheckNum;
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -170,14 +176,24 @@ public class SensitiveWordContext implements IWordContext {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean sensitiveEmailCheck() {
|
||||
return sensitiveEmailCheck;
|
||||
public boolean sensitiveCheckEmail() {
|
||||
return sensitiveCheckEmail;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SensitiveWordContext sensitiveEmailCheck(boolean sensitiveEmailCheck) {
|
||||
this.sensitiveEmailCheck = sensitiveEmailCheck;
|
||||
public SensitiveWordContext sensitiveCheckEmail(boolean sensitiveCheckEmail) {
|
||||
this.sensitiveCheckEmail = sensitiveCheckEmail;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean sensitiveCheckUrl() {
|
||||
return sensitiveCheckUrl;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SensitiveWordContext sensitiveCheckUrl(boolean sensitiveCheckUrl) {
|
||||
this.sensitiveCheckUrl = sensitiveCheckUrl;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package com.github.houbb.sensitive.word.api;
|
||||
package com.github.houbb.sensitive.word.support.check;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
|
||||
/**
|
||||
@@ -33,9 +34,9 @@ public interface ISensitiveCheck {
|
||||
* @return 敏感信息对应的长度
|
||||
* @since 0.0.5
|
||||
*/
|
||||
int checkSensitive(final String txt,
|
||||
final int beginIndex,
|
||||
final ValidModeEnum validModeEnum,
|
||||
final IWordContext context);
|
||||
SensitiveCheckResult sensitiveCheck(final String txt,
|
||||
final int beginIndex,
|
||||
final ValidModeEnum validModeEnum,
|
||||
final IWordContext context);
|
||||
|
||||
}
|
||||
@@ -1,53 +0,0 @@
|
||||
package com.github.houbb.sensitive.word.support.check;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.guava.Guavas;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 敏感词检测责任链模式
|
||||
*
|
||||
* 这里可以提供一个公共的父类。
|
||||
*
|
||||
*
|
||||
* DFA 算法的优化可以参考论文:
|
||||
* 【DFA 算法】各种论文。
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.0.5
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class SensitiveCheckChain implements ISensitiveCheck {
|
||||
|
||||
@Override
|
||||
public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
// 初始化责任链
|
||||
List<ISensitiveCheck> sensitiveChecks = Guavas.newArrayList();
|
||||
// 默认添加敏感词校验
|
||||
sensitiveChecks.add(Instances.singleton(SensitiveWordCheck.class));
|
||||
if(context.sensitiveNumCheck()) {
|
||||
sensitiveChecks.add(Instances.singleton(SensitiveNumCheck.class));
|
||||
}
|
||||
if(context.sensitiveEmailCheck()) {
|
||||
sensitiveChecks.add(Instances.singleton(SensitiveEmailCheck.class));
|
||||
}
|
||||
|
||||
// 循环调用
|
||||
for(ISensitiveCheck sensitiveCheck : sensitiveChecks) {
|
||||
int result = sensitiveCheck.checkSensitive(txt, beginIndex, validModeEnum, context);
|
||||
|
||||
if(result > 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// 默认返回 0
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package com.github.houbb.sensitive.word.support.check;
|
||||
|
||||
/**
|
||||
* 敏感信息监测接口结果
|
||||
*
|
||||
* 可以使用责任链的模式,循环调用。
|
||||
* @author binbin.hou
|
||||
* @since 0.0.12
|
||||
*/
|
||||
public class SensitiveCheckResult {
|
||||
|
||||
/**
|
||||
* 下标
|
||||
* @since 0.0.12
|
||||
*/
|
||||
private int index;
|
||||
|
||||
/**
|
||||
* 检测类
|
||||
* @since 0.0.12
|
||||
*/
|
||||
private Class<? extends ISensitiveCheck> checkClass;
|
||||
|
||||
/**
|
||||
* 实例化
|
||||
* @param index 返回索引
|
||||
* @param checkClass 验证类
|
||||
* @return 结果
|
||||
* @since 0.0.12
|
||||
*/
|
||||
public static SensitiveCheckResult of(final int index,
|
||||
final Class<? extends ISensitiveCheck> checkClass) {
|
||||
SensitiveCheckResult result = new SensitiveCheckResult();
|
||||
result.index(index).checkClass(checkClass);
|
||||
return result;
|
||||
}
|
||||
|
||||
public int index() {
|
||||
return index;
|
||||
}
|
||||
|
||||
public SensitiveCheckResult index(int index) {
|
||||
this.index = index;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Class<? extends ISensitiveCheck> checkClass() {
|
||||
return checkClass;
|
||||
}
|
||||
|
||||
public SensitiveCheckResult checkClass(Class<? extends ISensitiveCheck> checkClass) {
|
||||
this.checkClass = checkClass;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SensitiveCheckResult{" +
|
||||
"index=" + index +
|
||||
", checkClass=" + checkClass +
|
||||
'}';
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.guava.Guavas;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 敏感词检测责任链模式
|
||||
*
|
||||
* 这里可以提供一个公共的父类。
|
||||
*
|
||||
*
|
||||
* DFA 算法的优化可以参考论文:
|
||||
* 【DFA 算法】各种论文。
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.0.5
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class SensitiveCheckChain implements ISensitiveCheck {
|
||||
|
||||
@Override
|
||||
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
// 初始化责任链
|
||||
List<ISensitiveCheck> sensitiveChecks = Guavas.newArrayList();
|
||||
// 默认添加敏感词校验
|
||||
sensitiveChecks.add(Instances.singleton(SensitiveCheckWord.class));
|
||||
if(context.sensitiveCheckNum()) {
|
||||
sensitiveChecks.add(Instances.singleton(SensitiveCheckNum.class));
|
||||
}
|
||||
if(context.sensitiveCheckEmail()) {
|
||||
sensitiveChecks.add(Instances.singleton(SensitiveCheckEmail.class));
|
||||
}
|
||||
if(context.sensitiveCheckUrl()) {
|
||||
sensitiveChecks.add(Instances.singleton(SensitiveCheckUrl.class));
|
||||
}
|
||||
|
||||
// 循环调用
|
||||
for(ISensitiveCheck sensitiveCheck : sensitiveChecks) {
|
||||
SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(txt, beginIndex, validModeEnum, context);
|
||||
|
||||
if(result.index() > 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// 这里直接进行正则表达式相关的调用。
|
||||
// 默认返回 0
|
||||
return SensitiveCheckResult.of(0, SensitiveCheckChain.class);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,12 +1,13 @@
|
||||
package com.github.houbb.sensitive.word.support.check;
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.lang.CharUtil;
|
||||
import com.github.houbb.heaven.util.util.regex.RegexUtil;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||
|
||||
/**
|
||||
@@ -25,10 +26,10 @@ import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||
* @since 0.0.9
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class SensitiveEmailCheck implements ISensitiveCheck {
|
||||
public class SensitiveCheckEmail implements ISensitiveCheck {
|
||||
|
||||
@Override
|
||||
public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
// 记录敏感词的长度
|
||||
int lengthCount = 0;
|
||||
int actualLength = 0;
|
||||
@@ -59,7 +60,7 @@ public class SensitiveEmailCheck implements ISensitiveCheck {
|
||||
}
|
||||
}
|
||||
|
||||
return actualLength;
|
||||
return SensitiveCheckResult.of(actualLength, SensitiveCheckEmail.class);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1,17 +1,12 @@
|
||||
package com.github.houbb.sensitive.word.support.check;
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.io.FileUtil;
|
||||
import com.github.houbb.heaven.util.lang.NumUtil;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||
import com.github.houbb.sensitive.word.support.format.IgnoreNumStyleCharFormat;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 敏感词监测实现
|
||||
@@ -21,10 +16,10 @@ import java.util.List;
|
||||
* @since 0.0.5
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class SensitiveNumCheck implements ISensitiveCheck {
|
||||
public class SensitiveCheckNum implements ISensitiveCheck {
|
||||
|
||||
@Override
|
||||
public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
// 记录敏感词的长度
|
||||
int lengthCount = 0;
|
||||
int actualLength = 0;
|
||||
@@ -55,7 +50,7 @@ public class SensitiveNumCheck implements ISensitiveCheck {
|
||||
}
|
||||
}
|
||||
|
||||
return actualLength;
|
||||
return SensitiveCheckResult.of(actualLength, SensitiveCheckNum.class);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -0,0 +1,82 @@
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.CommonEager;
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.lang.CharUtil;
|
||||
import com.github.houbb.heaven.util.util.regex.RegexUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||
|
||||
/**
|
||||
* URL 正则表达式检测实现。
|
||||
*
|
||||
* 也可以严格的保留下来。
|
||||
*
|
||||
* (1)暂时先粗略的处理 web-site
|
||||
* (2)如果网址的最后为图片类型,则跳过。
|
||||
* (3)长度超过 70,直接结束。
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.0.9
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class SensitiveCheckUrl implements ISensitiveCheck {
|
||||
|
||||
/**
|
||||
* 最长的网址长度
|
||||
* @since 0.0.12
|
||||
*/
|
||||
private static final int MAX_WEB_SITE_LEN = 70;
|
||||
|
||||
@Override
|
||||
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
// 记录敏感词的长度
|
||||
int lengthCount = 0;
|
||||
int actualLength = 0;
|
||||
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
// 这里偷懒直接使用 String 拼接,然后结合正则表达式。
|
||||
// DFA 本质就可以做正则表达式,这样实现不免性能会差一些。
|
||||
// 后期如果有想法,对 DFA 进一步深入学习后,将进行优化。
|
||||
for(int i = beginIndex; i < txt.length(); i++) {
|
||||
char currentChar = txt.charAt(i);
|
||||
char mappingChar = Instances.singleton(CharFormatChain.class)
|
||||
.format(currentChar, context);
|
||||
|
||||
if(CharUtil.isWebSiteChar(mappingChar)
|
||||
&& lengthCount <= MAX_WEB_SITE_LEN) {
|
||||
lengthCount++;
|
||||
stringBuilder.append(currentChar);
|
||||
|
||||
if(isCondition(stringBuilder.toString())) {
|
||||
actualLength = lengthCount;
|
||||
|
||||
// 是否遍历全部匹配的模式
|
||||
if(ValidModeEnum.FAIL_FAST.equals(validModeEnum)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return SensitiveCheckResult.of(actualLength, SensitiveCheckUrl.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* 这里指定一个阈值条件
|
||||
* @param string 长度
|
||||
* @return 是否满足条件
|
||||
* @since 0.0.12
|
||||
*/
|
||||
private boolean isCondition(final String string) {
|
||||
return RegexUtil.isWebSite(string);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -1,12 +1,13 @@
|
||||
package com.github.houbb.sensitive.word.support.check;
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.lang.ObjectUtil;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.AppConst;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||
|
||||
import java.util.Map;
|
||||
@@ -17,10 +18,10 @@ import java.util.Map;
|
||||
* @since 0.0.5
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class SensitiveWordCheck implements ISensitiveCheck {
|
||||
public class SensitiveCheckWord implements ISensitiveCheck {
|
||||
|
||||
@Override
|
||||
public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
Map nowMap = context.sensitiveWordMap();
|
||||
|
||||
// 记录敏感词的长度
|
||||
@@ -53,7 +54,7 @@ public class SensitiveWordCheck implements ISensitiveCheck {
|
||||
}
|
||||
}
|
||||
|
||||
return actualLength;
|
||||
return SensitiveCheckResult.of(actualLength, SensitiveCheckWord.class);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.support.map;
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.guava.Guavas;
|
||||
import com.github.houbb.heaven.util.io.FileUtil;
|
||||
import com.github.houbb.heaven.util.lang.CharUtil;
|
||||
import com.github.houbb.heaven.util.lang.ObjectUtil;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
@@ -12,7 +13,9 @@ import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordMap;
|
||||
import com.github.houbb.sensitive.word.constant.AppConst;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckChain;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckChain;
|
||||
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
@@ -118,9 +121,9 @@ public class SensitiveWordMap implements IWordMap {
|
||||
}
|
||||
|
||||
for (int i = 0; i < string.length(); i++) {
|
||||
int checkResult = checkSensitive(string, i, ValidModeEnum.FAIL_FAST, context);
|
||||
SensitiveCheckResult checkResult = sensitiveCheck(string, i, ValidModeEnum.FAIL_FAST, context);
|
||||
// 快速返回
|
||||
if (checkResult > 0) {
|
||||
if (checkResult.index() > 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -178,9 +181,9 @@ public class SensitiveWordMap implements IWordMap {
|
||||
|
||||
List<String> resultList = Guavas.newArrayList();
|
||||
for (int i = 0; i < text.length(); i++) {
|
||||
int wordLength = checkSensitive(text, i, ValidModeEnum.FAIL_OVER, context);
|
||||
|
||||
SensitiveCheckResult checkResult = sensitiveCheck(text, i, ValidModeEnum.FAIL_OVER, context);
|
||||
// 命中
|
||||
int wordLength = checkResult.index();
|
||||
if (wordLength > 0) {
|
||||
// 保存敏感词
|
||||
String sensitiveWord = text.substring(i, i + wordLength);
|
||||
@@ -223,12 +226,22 @@ public class SensitiveWordMap implements IWordMap {
|
||||
for (int i = 0; i < target.length(); i++) {
|
||||
char currentChar = target.charAt(i);
|
||||
// 内层直接从 i 开始往后遍历,这个算法的,获取第一个匹配的单词
|
||||
int wordLength = checkSensitive(target, i, ValidModeEnum.FAIL_OVER, context);
|
||||
SensitiveCheckResult checkResult = sensitiveCheck(target, i, ValidModeEnum.FAIL_OVER, context);
|
||||
|
||||
// 敏感词
|
||||
int wordLength = checkResult.index();
|
||||
if(wordLength > 0) {
|
||||
String replaceStr = CharUtil.repeat(replaceChar, wordLength);
|
||||
resultBuilder.append(replaceStr);
|
||||
// 是否执行替换
|
||||
Class checkClass = checkResult.checkClass();
|
||||
String string = target.substring(i, i+wordLength);
|
||||
if(SensitiveCheckUrl.class.equals(checkClass)
|
||||
&& FileUtil.isImage(string)) {
|
||||
// 直接使用原始内容,避免 markdown 图片转换失败
|
||||
resultBuilder.append(string);
|
||||
} else {
|
||||
String replaceStr = CharUtil.repeat(replaceChar, wordLength);
|
||||
resultBuilder.append(replaceStr);
|
||||
}
|
||||
|
||||
// 直接跳过敏感词的长度
|
||||
i += wordLength-1;
|
||||
@@ -242,13 +255,13 @@ public class SensitiveWordMap implements IWordMap {
|
||||
}
|
||||
|
||||
@Override
|
||||
public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
// 默认执行敏感词操作
|
||||
context.sensitiveWordMap(innerWordMap);
|
||||
|
||||
// 责任链模式调用
|
||||
return Instances.singleton(SensitiveCheckChain.class)
|
||||
.checkSensitive(txt, beginIndex, validModeEnum, context);
|
||||
.sensitiveCheck(txt, beginIndex, validModeEnum, context);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ public class SensitiveWordBsChineseTest {
|
||||
final String text = "我爱我的祖国和五星紅旗。";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
Assert.assertEquals("[五星紅旗]", wordList.toString());
|
||||
Assert.assertEquals("[祖国, 五星紅旗]", wordList.toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ public class SensitiveWordBsEmailTest {
|
||||
final String text = "楼主好人,邮箱 123456789@xx.com";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
Assert.assertEquals("[邮箱, 123456789]", wordList.toString());
|
||||
Assert.assertEquals("[邮箱, 123456789, xx.com]", wordList.toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* <p> project: sensitive-word-SensitiveWordBsTest </p>
|
||||
* <p> create on 2020/1/7 23:43 </p>
|
||||
*
|
||||
* @author Administrator
|
||||
* @since 0.0.12
|
||||
*/
|
||||
public class SensitiveWordBsUrlTest {
|
||||
|
||||
/**
|
||||
* 忽略中文繁简体
|
||||
* @since 0.0.12
|
||||
*/
|
||||
@Test
|
||||
public void commonUrlTest() {
|
||||
final String text = "点击链接 www.baidu.com查看答案";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
Assert.assertEquals("[链接, www.baidu.com]", wordList.toString());
|
||||
|
||||
Assert.assertEquals("点击** *************查看答案", SensitiveWordBs
|
||||
.newInstance().replace(text));
|
||||
}
|
||||
|
||||
/**
|
||||
* 图片测试
|
||||
*
|
||||
* (1)可以检测
|
||||
* (2)默认不替换
|
||||
*
|
||||
* @since 0.0.12
|
||||
*/
|
||||
@Test
|
||||
public void imageUrlTest() {
|
||||
final String text = "双击查看大图 www.big-image.png查看";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
Assert.assertEquals("[www.big-image.png]", wordList.toString());
|
||||
|
||||
Assert.assertEquals(text, SensitiveWordBs.newInstance().replace(text));
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user