release branch 0.0.12

This commit is contained in:
binbin.hou
2020-01-16 20:54:14 +08:00
parent 88fc3a68d1
commit f45cb0be5a
20 changed files with 429 additions and 135 deletions

View File

@@ -87,3 +87,9 @@
|:---|:---|:---|:---|:--| |:---|:---|:---|:---|:--|
| 1 | A | 添加对于数字过滤的可配置型 | 2020-1-14 22:48:12 | | | 1 | A | 添加对于数字过滤的可配置型 | 2020-1-14 22:48:12 | |
| 2 | A | 添加部分敏感词 | 2020-1-14 22:48:12 | | | 2 | A | 添加部分敏感词 | 2020-1-14 22:48:12 | |
# release_0.0.12
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:---|:---|:---|:--|
| 1 | A | 添加对于网址的过滤 | 2020-1-16 20:51:58 | |

View File

@@ -1,6 +1,8 @@
# 是否为邮箱 check # 是否为邮箱 check
暂时先使用基本的正则表达式,
================== ==================
网址等等 网址等等
@@ -13,6 +15,10 @@ Image-URL 检测,避免替换错误。
针对不同的信息脱敏,则需要知道对应的检测代码是什么。 针对不同的信息脱敏,则需要知道对应的检测代码是什么。
jpg
png
jpeg
gif
## 是否脱敏的配置 ## 是否脱敏的配置
@@ -26,4 +32,11 @@ Image-URL 检测,避免替换错误。
可以直接开辟另一道验证方式。 可以直接开辟另一道验证方式。
直接 regex+全文检索实现。 直接 regex+全文检索实现。
# 前提
首先实现 Regex
这里也可以支持 allow_regex/deny_regex

17
pom.xml
View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId> <groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId> <artifactId>sensitive-word</artifactId>
<version>0.0.12-SNAPSHOT</version> <version>0.0.12</version>
<properties> <properties>
<!--============================== All Plugins START ==============================--> <!--============================== All Plugins START ==============================-->
@@ -25,7 +25,7 @@
<project.compiler.level>1.7</project.compiler.level> <project.compiler.level>1.7</project.compiler.level>
<!--============================== INTER ==============================--> <!--============================== INTER ==============================-->
<heaven.version>0.1.72</heaven.version> <heaven.version>0.1.73</heaven.version>
<opencc4j.version>1.2.0</opencc4j.version> <opencc4j.version>1.2.0</opencc4j.version>
<!--============================== OTHER ==============================--> <!--============================== OTHER ==============================-->
@@ -37,13 +37,6 @@
<!--============================== SELF ==============================--> <!--============================== SELF ==============================-->
<!--============================== INTER ==============================--> <!--============================== INTER ==============================-->
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>heaven</artifactId>
<version>${heaven.version}</version>
<optional>true</optional>
</dependency>
<dependency> <dependency>
<groupId>com.github.houbb</groupId> <groupId>com.github.houbb</groupId>
<artifactId>opencc4j</artifactId> <artifactId>opencc4j</artifactId>
@@ -59,6 +52,12 @@
</exclusion> </exclusion>
</exclusions> </exclusions>
</dependency> </dependency>
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>heaven</artifactId>
<version>${heaven.version}</version>
</dependency>
<!--============================== OTHER ==============================--> <!--============================== OTHER ==============================-->
<dependency> <dependency>

View File

@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定) :: 版本号信息(需要手动指定)
:::: 旧版本名称 :::: 旧版本名称
SET version=0.0.11 SET version=0.0.12
:::: 新版本名称 :::: 新版本名称
SET newVersion=0.0.12 SET newVersion=0.0.13
:::: 组织名称 :::: 组织名称
SET groupName=com.github.houbb SET groupName=com.github.houbb
:::: 项目名称 :::: 项目名称

View File

@@ -88,30 +88,45 @@ public interface IWordContext {
* @return 数字检测 * @return 数字检测
* @since 0.0.5 * @since 0.0.5
*/ */
boolean sensitiveNumCheck(); boolean sensitiveCheckNum();
/** /**
* 设置敏感数字检测 * 设置敏感数字检测
* @param sensitiveNumCheck 数字格式检测 * @param sensitiveCheckNum 数字格式检测
* @return this * @return this
* @since 0.0.5 * @since 0.0.5
*/ */
IWordContext sensitiveNumCheck(final boolean sensitiveNumCheck); IWordContext sensitiveCheckNum(final boolean sensitiveCheckNum);
/** /**
* 是否进行邮箱检测 * 是否进行邮箱检测
* @return this * @return this
* @since 0.0.9 * @since 0.0.9
*/ */
boolean sensitiveEmailCheck(); boolean sensitiveCheckEmail();
/** /**
* 设置敏感邮箱检测 * 设置敏感邮箱检测
* @param sensitiveEmailCheck 是否检测 * @param sensitiveCheckEmail 是否检测
* @return this * @return this
* @since 0.0.9 * @since 0.0.9
*/ */
IWordContext sensitiveEmailCheck(final boolean sensitiveEmailCheck); IWordContext sensitiveCheckEmail(final boolean sensitiveCheckEmail);
/**
* 敏感链接检测
* @return 是否启用
* @since 0.
*/
boolean sensitiveCheckUrl();
/**
* 设置敏感邮箱检测
* @param sensitiveCheckUrl 是否检测
* @return this
* @since 0.0.9
*/
IWordContext sensitiveCheckUrl(final boolean sensitiveCheckUrl);
/** /**
* 忽略英文的写法 * 忽略英文的写法

View File

@@ -1,6 +1,7 @@
package com.github.houbb.sensitive.word.api; package com.github.houbb.sensitive.word.api;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;

View File

@@ -11,6 +11,7 @@ import java.util.List;
/** /**
* 敏感词引导类 * 敏感词引导类
*
* @author binbin.hou * @author binbin.hou
* @since 0.0.1 * @since 0.0.1
*/ */
@@ -18,37 +19,36 @@ public class SensitiveWordBs {
/** /**
* 私有化构造器 * 私有化构造器
*
* @since 0.0.1 * @since 0.0.1
*/ */
private SensitiveWordBs(){} private SensitiveWordBs() {
}
/** /**
* 敏感词 map * 敏感词 map
*
* @since 0.0.1 * @since 0.0.1
*/ */
private static volatile IWordMap sensitiveWordMap; private static volatile IWordMap sensitiveWordMap;
/** /**
* 默认的执行上下文 * 默认的执行上下文
*
* @since 0.0.4 * @since 0.0.4
*/ */
private volatile IWordContext context; private volatile IWordContext context;
/**
* 是否启用数字校验
* @since 0.0.11
*/
private boolean enableNumCheck = true;
/** /**
* DCL 初始化 wordMap 信息 * DCL 初始化 wordMap 信息
*
* @return 初始化后的结果 * @return 初始化后的结果
* @since 0.0.4 * @since 0.0.4
*/ */
private static IWordMap initWordMap() { private static IWordMap initWordMap() {
if(sensitiveWordMap == null) { if (sensitiveWordMap == null) {
synchronized (IWordMap.class) { synchronized (IWordMap.class) {
if(sensitiveWordMap == null) { if (sensitiveWordMap == null) {
// 加载配置信息 // 加载配置信息
IWordData wordData = new SensitiveWordData(); IWordData wordData = new SensitiveWordData();
List<String> lines = wordData.getWordData(); List<String> lines = wordData.getWordData();
@@ -65,8 +65,9 @@ public class SensitiveWordBs {
/** /**
* 新建验证实例 * 新建验证实例
* * <p>
* double-lock * double-lock
*
* @return this * @return this
* @since 0.0.1 * @since 0.0.1
*/ */
@@ -81,16 +82,40 @@ public class SensitiveWordBs {
/** /**
* 设置是否启动数字检测 * 设置是否启动数字检测
*
* @param enableNumCheck 数字检测 * @param enableNumCheck 数字检测
* @since 0.0.11 * @since 0.0.11
*/ */
public SensitiveWordBs enableNumCheck(boolean enableNumCheck) { public SensitiveWordBs enableNumCheck(boolean enableNumCheck) {
this.context.sensitiveNumCheck(enableNumCheck); this.context.sensitiveCheckNum(enableNumCheck);
return this; return this;
} }
/**
* 设置是否启动 email 检测
*
* @param enableEmailCheck email 检测
* @since 0.0.11
*/
public SensitiveWordBs enableEmailCheck(boolean enableEmailCheck) {
this.context.sensitiveCheckEmail(enableEmailCheck);
return this;
}
/**
* 设置是否启动 url 检测
*
* @param enableUrlCheck url 检测
* @since 0.0.12
*/
public SensitiveWordBs enableUrlCheck(boolean enableUrlCheck) {
this.context.sensitiveCheckUrl(enableUrlCheck);
return this;
}
/** /**
* 构建默认的上下文 * 构建默认的上下文
*
* @return 结果 * @return 结果
* @since 0.0.4 * @since 0.0.4
*/ */
@@ -105,13 +130,16 @@ public class SensitiveWordBs {
wordContext.ignoreRepeat(true); wordContext.ignoreRepeat(true);
// 开启校验 // 开启校验
wordContext.sensitiveNumCheck(true); wordContext.sensitiveCheckNum(true);
wordContext.sensitiveEmailCheck(true); wordContext.sensitiveCheckEmail(true);
wordContext.sensitiveCheckUrl(true);
return wordContext; return wordContext;
} }
/** /**
* 是否包含敏感词 * 是否包含敏感词
*
* @param target 目标字符串 * @param target 目标字符串
* @return 是否 * @return 是否
* @since 0.0.1 * @since 0.0.1
@@ -124,6 +152,7 @@ public class SensitiveWordBs {
* 返回所有的敏感词 * 返回所有的敏感词
* 1. 这里是默认去重的,且是有序的。 * 1. 这里是默认去重的,且是有序的。
* 2. 如果不存在,返回空列表 * 2. 如果不存在,返回空列表
*
* @param target 目标字符串 * @param target 目标字符串
* @return 敏感词列表 * @return 敏感词列表
* @since 0.0.1 * @since 0.0.1
@@ -135,6 +164,7 @@ public class SensitiveWordBs {
/** /**
* 返回第一个敏感词 * 返回第一个敏感词
* 1如果不存在则返回 {@code null} * 1如果不存在则返回 {@code null}
*
* @param target 目标字符串 * @param target 目标字符串
* @return 敏感词 * @return 敏感词
* @since 0.0.1 * @since 0.0.1
@@ -145,7 +175,8 @@ public class SensitiveWordBs {
/** /**
* 替换所有内容 * 替换所有内容
* @param target 目标字符串 *
* @param target 目标字符串
* @param replaceChar 替换为的 char * @param replaceChar 替换为的 char
* @return 替换后结果 * @return 替换后结果
* @since 0.0.2 * @since 0.0.2
@@ -157,12 +188,13 @@ public class SensitiveWordBs {
/** /**
* 替换所有内容 * 替换所有内容
* 1. 默认使用空格替换,避免星号改变 md 的格式。 * 1. 默认使用空格替换,避免星号改变 md 的格式。
*
* @param target 目标字符串 * @param target 目标字符串
* @return 替换后结果 * @return 替换后结果
* @since 0.0.2 * @since 0.0.2
*/ */
public String replace(final String target) { public String replace(final String target) {
return this.replace(target, CharConst.BLANK); return this.replace(target, CharConst.STAR);
} }
} }

View File

@@ -39,7 +39,7 @@ public class SensitiveWordContext implements IWordContext {
* 是否进行敏感数字检测 * 是否进行敏感数字检测
* @since 0.0.6 * @since 0.0.6
*/ */
private boolean sensitiveNumCheck; private boolean sensitiveCheckNum;
/** /**
* 是否忽略中文繁简体 * 是否忽略中文繁简体
@@ -63,7 +63,13 @@ public class SensitiveWordContext implements IWordContext {
* 是否进行邮箱测试 * 是否进行邮箱测试
* @since 0.0.9 * @since 0.0.9
*/ */
private boolean sensitiveEmailCheck; private boolean sensitiveCheckEmail;
/**
* 是否进行 url 测试
* @since 0.0.12
*/
private boolean sensitiveCheckUrl;
/** /**
* 私有化构造器 * 私有化构造器
@@ -126,13 +132,13 @@ public class SensitiveWordContext implements IWordContext {
} }
@Override @Override
public boolean sensitiveNumCheck() { public boolean sensitiveCheckNum() {
return sensitiveNumCheck; return sensitiveCheckNum;
} }
@Override @Override
public SensitiveWordContext sensitiveNumCheck(boolean sensitiveNumCheck) { public SensitiveWordContext sensitiveCheckNum(boolean sensitiveCheckNum) {
this.sensitiveNumCheck = sensitiveNumCheck; this.sensitiveCheckNum = sensitiveCheckNum;
return this; return this;
} }
@@ -170,14 +176,24 @@ public class SensitiveWordContext implements IWordContext {
} }
@Override @Override
public boolean sensitiveEmailCheck() { public boolean sensitiveCheckEmail() {
return sensitiveEmailCheck; return sensitiveCheckEmail;
} }
@Override @Override
public SensitiveWordContext sensitiveEmailCheck(boolean sensitiveEmailCheck) { public SensitiveWordContext sensitiveCheckEmail(boolean sensitiveCheckEmail) {
this.sensitiveEmailCheck = sensitiveEmailCheck; this.sensitiveCheckEmail = sensitiveCheckEmail;
return this; return this;
} }
@Override
public boolean sensitiveCheckUrl() {
return sensitiveCheckUrl;
}
@Override
public SensitiveWordContext sensitiveCheckUrl(boolean sensitiveCheckUrl) {
this.sensitiveCheckUrl = sensitiveCheckUrl;
return this;
}
} }

View File

@@ -1,5 +1,6 @@
package com.github.houbb.sensitive.word.api; package com.github.houbb.sensitive.word.support.check;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
/** /**
@@ -33,9 +34,9 @@ public interface ISensitiveCheck {
* @return 敏感信息对应的长度 * @return 敏感信息对应的长度
* @since 0.0.5 * @since 0.0.5
*/ */
int checkSensitive(final String txt, SensitiveCheckResult sensitiveCheck(final String txt,
final int beginIndex, final int beginIndex,
final ValidModeEnum validModeEnum, final ValidModeEnum validModeEnum,
final IWordContext context); final IWordContext context);
} }

View File

@@ -1,53 +0,0 @@
package com.github.houbb.sensitive.word.support.check;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.sensitive.word.api.ISensitiveCheck;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import java.util.List;
/**
* 敏感词检测责任链模式
*
* 这里可以提供一个公共的父类。
*
*
* DFA 算法的优化可以参考论文:
* 【DFA 算法】各种论文。
*
* @author binbin.hou
* @since 0.0.5
*/
@ThreadSafe
public class SensitiveCheckChain implements ISensitiveCheck {
@Override
public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
// 初始化责任链
List<ISensitiveCheck> sensitiveChecks = Guavas.newArrayList();
// 默认添加敏感词校验
sensitiveChecks.add(Instances.singleton(SensitiveWordCheck.class));
if(context.sensitiveNumCheck()) {
sensitiveChecks.add(Instances.singleton(SensitiveNumCheck.class));
}
if(context.sensitiveEmailCheck()) {
sensitiveChecks.add(Instances.singleton(SensitiveEmailCheck.class));
}
// 循环调用
for(ISensitiveCheck sensitiveCheck : sensitiveChecks) {
int result = sensitiveCheck.checkSensitive(txt, beginIndex, validModeEnum, context);
if(result > 0) {
return result;
}
}
// 默认返回 0
return 0;
}
}

View File

@@ -0,0 +1,64 @@
package com.github.houbb.sensitive.word.support.check;
/**
* 敏感信息监测接口结果
*
* 可以使用责任链的模式,循环调用。
* @author binbin.hou
* @since 0.0.12
*/
public class SensitiveCheckResult {
/**
* 下标
* @since 0.0.12
*/
private int index;
/**
* 检测类
* @since 0.0.12
*/
private Class<? extends ISensitiveCheck> checkClass;
/**
* 实例化
* @param index 返回索引
* @param checkClass 验证类
* @return 结果
* @since 0.0.12
*/
public static SensitiveCheckResult of(final int index,
final Class<? extends ISensitiveCheck> checkClass) {
SensitiveCheckResult result = new SensitiveCheckResult();
result.index(index).checkClass(checkClass);
return result;
}
public int index() {
return index;
}
public SensitiveCheckResult index(int index) {
this.index = index;
return this;
}
public Class<? extends ISensitiveCheck> checkClass() {
return checkClass;
}
public SensitiveCheckResult checkClass(Class<? extends ISensitiveCheck> checkClass) {
this.checkClass = checkClass;
return this;
}
@Override
public String toString() {
return "SensitiveCheckResult{" +
"index=" + index +
", checkClass=" + checkClass +
'}';
}
}

View File

@@ -0,0 +1,58 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
import java.util.List;
/**
* 敏感词检测责任链模式
*
* 这里可以提供一个公共的父类。
*
*
* DFA 算法的优化可以参考论文:
* 【DFA 算法】各种论文。
*
* @author binbin.hou
* @since 0.0.5
*/
@ThreadSafe
public class SensitiveCheckChain implements ISensitiveCheck {
@Override
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
// 初始化责任链
List<ISensitiveCheck> sensitiveChecks = Guavas.newArrayList();
// 默认添加敏感词校验
sensitiveChecks.add(Instances.singleton(SensitiveCheckWord.class));
if(context.sensitiveCheckNum()) {
sensitiveChecks.add(Instances.singleton(SensitiveCheckNum.class));
}
if(context.sensitiveCheckEmail()) {
sensitiveChecks.add(Instances.singleton(SensitiveCheckEmail.class));
}
if(context.sensitiveCheckUrl()) {
sensitiveChecks.add(Instances.singleton(SensitiveCheckUrl.class));
}
// 循环调用
for(ISensitiveCheck sensitiveCheck : sensitiveChecks) {
SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(txt, beginIndex, validModeEnum, context);
if(result.index() > 0) {
return result;
}
}
// 这里直接进行正则表达式相关的调用。
// 默认返回 0
return SensitiveCheckResult.of(0, SensitiveCheckChain.class);
}
}

View File

@@ -1,12 +1,13 @@
package com.github.houbb.sensitive.word.support.check; package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.instance.impl.Instances; import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.lang.CharUtil; import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.util.regex.RegexUtil; import com.github.houbb.heaven.util.util.regex.RegexUtil;
import com.github.houbb.sensitive.word.api.ISensitiveCheck;
import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
import com.github.houbb.sensitive.word.support.format.CharFormatChain; import com.github.houbb.sensitive.word.support.format.CharFormatChain;
/** /**
@@ -25,10 +26,10 @@ import com.github.houbb.sensitive.word.support.format.CharFormatChain;
* @since 0.0.9 * @since 0.0.9
*/ */
@ThreadSafe @ThreadSafe
public class SensitiveEmailCheck implements ISensitiveCheck { public class SensitiveCheckEmail implements ISensitiveCheck {
@Override @Override
public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
// 记录敏感词的长度 // 记录敏感词的长度
int lengthCount = 0; int lengthCount = 0;
int actualLength = 0; int actualLength = 0;
@@ -59,7 +60,7 @@ public class SensitiveEmailCheck implements ISensitiveCheck {
} }
} }
return actualLength; return SensitiveCheckResult.of(actualLength, SensitiveCheckEmail.class);
} }
/** /**

View File

@@ -1,17 +1,12 @@
package com.github.houbb.sensitive.word.support.check; package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.instance.impl.Instances; import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.io.FileUtil;
import com.github.houbb.heaven.util.lang.NumUtil;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.sensitive.word.api.ISensitiveCheck;
import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
import com.github.houbb.sensitive.word.support.format.CharFormatChain; import com.github.houbb.sensitive.word.support.format.CharFormatChain;
import com.github.houbb.sensitive.word.support.format.IgnoreNumStyleCharFormat;
import java.util.List;
/** /**
* 敏感词监测实现 * 敏感词监测实现
@@ -21,10 +16,10 @@ import java.util.List;
* @since 0.0.5 * @since 0.0.5
*/ */
@ThreadSafe @ThreadSafe
public class SensitiveNumCheck implements ISensitiveCheck { public class SensitiveCheckNum implements ISensitiveCheck {
@Override @Override
public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
// 记录敏感词的长度 // 记录敏感词的长度
int lengthCount = 0; int lengthCount = 0;
int actualLength = 0; int actualLength = 0;
@@ -55,7 +50,7 @@ public class SensitiveNumCheck implements ISensitiveCheck {
} }
} }
return actualLength; return SensitiveCheckResult.of(actualLength, SensitiveCheckNum.class);
} }
/** /**

View File

@@ -0,0 +1,82 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.CommonEager;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.util.regex.RegexUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
/**
* URL 正则表达式检测实现。
*
* 也可以严格的保留下来。
*
* 1暂时先粗略的处理 web-site
* 2如果网址的最后为图片类型则跳过。
* 3长度超过 70直接结束。
*
* @author binbin.hou
* @since 0.0.9
*/
@ThreadSafe
public class SensitiveCheckUrl implements ISensitiveCheck {
/**
* 最长的网址长度
* @since 0.0.12
*/
private static final int MAX_WEB_SITE_LEN = 70;
@Override
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
// 记录敏感词的长度
int lengthCount = 0;
int actualLength = 0;
StringBuilder stringBuilder = new StringBuilder();
// 这里偷懒直接使用 String 拼接,然后结合正则表达式。
// DFA 本质就可以做正则表达式,这样实现不免性能会差一些。
// 后期如果有想法,对 DFA 进一步深入学习后,将进行优化。
for(int i = beginIndex; i < txt.length(); i++) {
char currentChar = txt.charAt(i);
char mappingChar = Instances.singleton(CharFormatChain.class)
.format(currentChar, context);
if(CharUtil.isWebSiteChar(mappingChar)
&& lengthCount <= MAX_WEB_SITE_LEN) {
lengthCount++;
stringBuilder.append(currentChar);
if(isCondition(stringBuilder.toString())) {
actualLength = lengthCount;
// 是否遍历全部匹配的模式
if(ValidModeEnum.FAIL_FAST.equals(validModeEnum)) {
break;
}
}
} else {
break;
}
}
return SensitiveCheckResult.of(actualLength, SensitiveCheckUrl.class);
}
/**
* 这里指定一个阈值条件
* @param string 长度
* @return 是否满足条件
* @since 0.0.12
*/
private boolean isCondition(final String string) {
return RegexUtil.isWebSite(string);
}
}

View File

@@ -1,12 +1,13 @@
package com.github.houbb.sensitive.word.support.check; package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.instance.impl.Instances; import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.lang.ObjectUtil; import com.github.houbb.heaven.util.lang.ObjectUtil;
import com.github.houbb.sensitive.word.api.ISensitiveCheck;
import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.AppConst; import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
import com.github.houbb.sensitive.word.support.format.CharFormatChain; import com.github.houbb.sensitive.word.support.format.CharFormatChain;
import java.util.Map; import java.util.Map;
@@ -17,10 +18,10 @@ import java.util.Map;
* @since 0.0.5 * @since 0.0.5
*/ */
@ThreadSafe @ThreadSafe
public class SensitiveWordCheck implements ISensitiveCheck { public class SensitiveCheckWord implements ISensitiveCheck {
@Override @Override
public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
Map nowMap = context.sensitiveWordMap(); Map nowMap = context.sensitiveWordMap();
// 记录敏感词的长度 // 记录敏感词的长度
@@ -53,7 +54,7 @@ public class SensitiveWordCheck implements ISensitiveCheck {
} }
} }
return actualLength; return SensitiveCheckResult.of(actualLength, SensitiveCheckWord.class);
} }
/** /**

View File

@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.support.map;
import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.instance.impl.Instances; import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.guava.Guavas; import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.heaven.util.io.FileUtil;
import com.github.houbb.heaven.util.lang.CharUtil; import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.lang.ObjectUtil; import com.github.houbb.heaven.util.lang.ObjectUtil;
import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.heaven.util.lang.StringUtil;
@@ -12,7 +13,9 @@ import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordMap; import com.github.houbb.sensitive.word.api.IWordMap;
import com.github.houbb.sensitive.word.constant.AppConst; import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckChain; import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckChain;
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
@@ -118,9 +121,9 @@ public class SensitiveWordMap implements IWordMap {
} }
for (int i = 0; i < string.length(); i++) { for (int i = 0; i < string.length(); i++) {
int checkResult = checkSensitive(string, i, ValidModeEnum.FAIL_FAST, context); SensitiveCheckResult checkResult = sensitiveCheck(string, i, ValidModeEnum.FAIL_FAST, context);
// 快速返回 // 快速返回
if (checkResult > 0) { if (checkResult.index() > 0) {
return true; return true;
} }
} }
@@ -178,9 +181,9 @@ public class SensitiveWordMap implements IWordMap {
List<String> resultList = Guavas.newArrayList(); List<String> resultList = Guavas.newArrayList();
for (int i = 0; i < text.length(); i++) { for (int i = 0; i < text.length(); i++) {
int wordLength = checkSensitive(text, i, ValidModeEnum.FAIL_OVER, context); SensitiveCheckResult checkResult = sensitiveCheck(text, i, ValidModeEnum.FAIL_OVER, context);
// 命中 // 命中
int wordLength = checkResult.index();
if (wordLength > 0) { if (wordLength > 0) {
// 保存敏感词 // 保存敏感词
String sensitiveWord = text.substring(i, i + wordLength); String sensitiveWord = text.substring(i, i + wordLength);
@@ -223,12 +226,22 @@ public class SensitiveWordMap implements IWordMap {
for (int i = 0; i < target.length(); i++) { for (int i = 0; i < target.length(); i++) {
char currentChar = target.charAt(i); char currentChar = target.charAt(i);
// 内层直接从 i 开始往后遍历,这个算法的,获取第一个匹配的单词 // 内层直接从 i 开始往后遍历,这个算法的,获取第一个匹配的单词
int wordLength = checkSensitive(target, i, ValidModeEnum.FAIL_OVER, context); SensitiveCheckResult checkResult = sensitiveCheck(target, i, ValidModeEnum.FAIL_OVER, context);
// 敏感词 // 敏感词
int wordLength = checkResult.index();
if(wordLength > 0) { if(wordLength > 0) {
String replaceStr = CharUtil.repeat(replaceChar, wordLength); // 是否执行替换
resultBuilder.append(replaceStr); Class checkClass = checkResult.checkClass();
String string = target.substring(i, i+wordLength);
if(SensitiveCheckUrl.class.equals(checkClass)
&& FileUtil.isImage(string)) {
// 直接使用原始内容,避免 markdown 图片转换失败
resultBuilder.append(string);
} else {
String replaceStr = CharUtil.repeat(replaceChar, wordLength);
resultBuilder.append(replaceStr);
}
// 直接跳过敏感词的长度 // 直接跳过敏感词的长度
i += wordLength-1; i += wordLength-1;
@@ -242,13 +255,13 @@ public class SensitiveWordMap implements IWordMap {
} }
@Override @Override
public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
// 默认执行敏感词操作 // 默认执行敏感词操作
context.sensitiveWordMap(innerWordMap); context.sensitiveWordMap(innerWordMap);
// 责任链模式调用 // 责任链模式调用
return Instances.singleton(SensitiveCheckChain.class) return Instances.singleton(SensitiveCheckChain.class)
.checkSensitive(txt, beginIndex, validModeEnum, context); .sensitiveCheck(txt, beginIndex, validModeEnum, context);
} }
} }

View File

@@ -23,7 +23,7 @@ public class SensitiveWordBsChineseTest {
final String text = "我爱我的祖国和五星紅旗。"; final String text = "我爱我的祖国和五星紅旗。";
List<String> wordList = SensitiveWordBs.newInstance().findAll(text); List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
Assert.assertEquals("[五星紅旗]", wordList.toString()); Assert.assertEquals("[祖国, 五星紅旗]", wordList.toString());
} }
} }

View File

@@ -35,7 +35,7 @@ public class SensitiveWordBsEmailTest {
final String text = "楼主好人,邮箱 123456789@xx.com"; final String text = "楼主好人,邮箱 123456789@xx.com";
List<String> wordList = SensitiveWordBs.newInstance().findAll(text); List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
Assert.assertEquals("[邮箱, 123456789]", wordList.toString()); Assert.assertEquals("[邮箱, 123456789, xx.com]", wordList.toString());
} }
} }

View File

@@ -0,0 +1,50 @@
package com.github.houbb.sensitive.word.bs;
import org.junit.Assert;
import org.junit.Test;
import java.util.List;
/**
* <p> project: sensitive-word-SensitiveWordBsTest </p>
* <p> create on 2020/1/7 23:43 </p>
*
* @author Administrator
* @since 0.0.12
*/
public class SensitiveWordBsUrlTest {
/**
* 忽略中文繁简体
* @since 0.0.12
*/
@Test
public void commonUrlTest() {
final String text = "点击链接 www.baidu.com查看答案";
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
Assert.assertEquals("[链接, www.baidu.com]", wordList.toString());
Assert.assertEquals("点击** *************查看答案", SensitiveWordBs
.newInstance().replace(text));
}
/**
* 图片测试
*
* 1可以检测
* 2默认不替换
*
* @since 0.0.12
*/
@Test
public void imageUrlTest() {
final String text = "双击查看大图 www.big-image.png查看";
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
Assert.assertEquals("[www.big-image.png]", wordList.toString());
Assert.assertEquals(text, SensitiveWordBs.newInstance().replace(text));
}
}