release branch 0.3.0

This commit is contained in:
binbin.hou
2023-06-06 19:58:35 +08:00
parent b1ed3249d0
commit ab6e91f7a4
56 changed files with 952 additions and 455 deletions

View File

@@ -140,3 +140,11 @@
|:---|:---|:---|:---|:--|
| 1 | O | 移除日志初始化的控台日志输出 | 2023-02-17 23:51:58 | |
| 2 | A | 支持数字检验的长度指定 | 2022-01-17 23:51:58 | |
# release_0.3.0
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|:-----------------|:--------------------|:--|
| 1 | O | 移除冗余的耗时统计 | 2023-06-06 23:51:58 | |
| 2 | A | 优化代码实现方式,添加工具类方法 | 2023-06-06 23:51:58 | |

View File

@@ -46,9 +46,9 @@
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/doc/CHANGE_LOG.md)
v0.2.1 变更:
v0.3.0 变更:
- 支持用户自定义数字检测的长度
- 代码实现优化
# 快速开始
@@ -64,7 +64,7 @@ v0.2.1 变更:
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.2.1</version>
<version>0.3.0</version>
</dependency>
```
@@ -285,6 +285,7 @@ final String text = "ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦ the bad words";
List<String> wordList = SensitiveWordBs.newInstance()
.ignoreRepeat(true)
.init()
.findAll(text);
Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString());
```
@@ -308,12 +309,13 @@ V0.2.1 之后,支持通过 `numCheckLen(长度)` 自定义检测的长度。
final String text = "你懂得12345678";
// 默认检测 8 位
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[12345678]", wordList.toString());
// 指定数字的长度,避免误杀
List<String> wordList2 = SensitiveWordBs.newInstance()
.numCheckLen(9)
.init()
.findAll(text);
Assert.assertEquals("[]", wordList2.toString());
```
@@ -353,18 +355,19 @@ Assert.assertTrue(wordBs.contains(text));
其中各项配置的说明如下:
| 序号 | 方法 | 说明 | 默认值 |
|:----|:---|:--------------|:------|
| 1 | ignoreCase | 忽略大小写 | true |
| 2 | ignoreWidth | 忽略半角圆角 | true |
| 3 | ignoreNumStyle | 忽略数字的写法 | true |
| 4 | ignoreChineseStyle | 忽略中文的书写格式 | true |
| 5 | ignoreEnglishStyle | 忽略英文的书写格式 | true |
| 6 | ignoreRepeat | 忽略重复词 | false |
| 7 | enableNumCheck | 是否启用数字检测。 | true |
| 8 | enableEmailCheck | 是有启用邮箱检测 | true |
| 9 | enableUrlCheck | 是否启用链接检测 | true |
| 10 | numCheckLen | 数字检测,自定义指定长度。| 8 |
| 序号 | 方法 | 说明 | 默认值 |
|:---|:---------------------|:--------------|:-------|
| 1 | ignoreCase | 忽略大小写 | true |
| 2 | ignoreWidth | 忽略半角圆角 | true |
| 3 | ignoreNumStyle | 忽略数字的写法 | true |
| 4 | ignoreChineseStyle | 忽略中文的书写格式 | true |
| 5 | ignoreEnglishStyle | 忽略英文的书写格式 | true |
| 6 | ignoreRepeat | 忽略重复词 | false |
| 7 | enableNumCheck | 是否启用数字检测。 | true |
| 8 | enableEmailCheck | 是有启用邮箱检测 | true |
| 9 | enableUrlCheck | 是否启用链接检测 | true |
| 10 | numCheckLen | 数字检测,自定义指定长度。 | 8 |
| 11 | sensitiveWordReplace | 敏感词替换策略 | `*` 替换 |
# 动态加载(用户自定义)
@@ -612,6 +615,8 @@ public class SensitiveWordService {
# 后期 road-map
- [ ] wordMap 的抽象,便于拓展
- 同音字处理
- 形近字处理

View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.2.1</version>
<version>0.3.0</version>
<properties>
<!--============================== All Plugins START ==============================-->

View File

@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
SET version=0.2.1
SET version=0.3.0
:::: 新版本名称
SET newVersion=0.2.2
SET newVersion=0.4.0
:::: 组织名称
SET groupName=com.github.houbb
:::: 项目名称

View File

@@ -1,5 +1,7 @@
package com.github.houbb.sensitive.word.api;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import java.util.Map;
/**
@@ -173,4 +175,51 @@ public interface IWordContext {
*/
IWordContext sensitiveCheckNumLen(final int sensitiveCheckNumLen);
/**
* 设置检测策略
* @param sensitiveCheck 检测策略
* @return this
* @since 0.3.0
*/
IWordContext sensitiveCheck(final ISensitiveCheck sensitiveCheck);
/**
* 获取检测策略
* @return 检测策略
* @since 0.3.0
*/
ISensitiveCheck sensitiveCheck();
/**
* 设置敏感词替换策略
* @param sensitiveWordReplace 策略
* @return this
* @since 0.3.0
*/
IWordContext sensitiveWordReplace(final ISensitiveWordReplace sensitiveWordReplace);
/**
* 敏感词替换策略
* @return 替换策略
* @since 0.3.0
*/
ISensitiveWordReplace sensitiveWordReplace();
/**
* 设置统一的字符处理
*
* @param charFormat 字符处理
* @return 结果
* @since 0.3.0
*/
IWordContext charFormat(final ICharFormat charFormat);
/**
* 获取格式化策略
*
* @return 策略
* @since 0.3.0
*/
ICharFormat charFormat();
}

View File

@@ -1,20 +0,0 @@
package com.github.houbb.sensitive.word.api;
import java.util.List;
/**
* 数据词接口
* @author binbin.hou
* @since 0.0.1
*/
@Deprecated
public interface IWordData {
/**
* 获取对应的敏感词
* @return 结果
* @since 0.0.1
*/
List<String> getWordData();
}

View File

@@ -59,13 +59,11 @@ public interface IWordMap extends ISensitiveCheck {
* ps: 这里可以添加优化。
*
* @param target 目标字符串
* @param replace 替换策略
* @param context 上下文
* @return 替换后结果
* @since 0.0.2
*/
String replace(final String target,
final ISensitiveWordReplace replace,
final IWordContext context);
}

View File

@@ -1,18 +1,20 @@
package com.github.houbb.sensitive.word.bs;
import com.github.houbb.heaven.constant.CharConst;
import com.github.houbb.heaven.support.handler.IHandler;
import com.github.houbb.heaven.util.common.ArgUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.*;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.impl.SensitiveChecks;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
import com.github.houbb.sensitive.word.support.map.SensitiveWordMap;
import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceChar;
import com.github.houbb.sensitive.word.support.format.CharFormats;
import com.github.houbb.sensitive.word.support.map.WordMaps;
import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaces;
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
import com.github.houbb.sensitive.word.utils.InnerFormatUtils;
import com.github.houbb.sensitive.word.utils.InnerWordDataUtils;
import java.util.*;
import java.util.List;
/**
* 敏感词引导类
@@ -30,19 +32,63 @@ public class SensitiveWordBs {
private SensitiveWordBs() {
}
//------------------------------------------------------------- 基本属性 START
// 格式统一化
/**
* 是否忽略大小写
*/
private boolean ignoreCase = true;
/**
* 是否忽略全角、半角
*/
private boolean ignoreWidth = true;
/**
* 是否忽略数字样式
*/
private boolean ignoreNumStyle = true;
/**
* 是否忽略中文样式
*/
private boolean ignoreChineseStyle = true;
/**
* 是否忽略英文样式
*/
private boolean ignoreEnglishStyle = true;
/**
* 是否忽略重复
*/
private boolean ignoreRepeat = false;
// 开启校验
/**
* 启用数字检测
*/
private boolean sensitiveCheckNum = true;
/**
* 启用邮箱检测
*/
private boolean sensitiveCheckEmail = true;
/**
* 启用 URL 检测
*/
private boolean sensitiveCheckUrl = true;
// 额外配置
/**
* 检测数字时的长度
*/
private int sensitiveCheckNumLen = 8;
//------------------------------------------------------------- 基本属性 END
/**
* 敏感词 map
*
* TODO: 暂时定义为 final后续放开抽象。
*
* @since 0.0.1
*/
private IWordMap sensitiveWordMap;
/**
* 默认的执行上下文
*
* @since 0.0.4
*/
private final IWordContext context = buildDefaultContext();
private final IWordMap wordMap = WordMaps.defaults();
/**
* 禁止的单词
@@ -57,76 +103,21 @@ public class SensitiveWordBs {
private IWordAllow wordAllow = WordAllows.system();
/**
* DCL 初始化 wordMap 信息
*
* 注意map 的构建是一个比较耗时的动作
* @since 0.0.4
* 替换策略
* @since 0.3.0
*/
private synchronized void initWordMap() {
// 加载配置信息
List<String> denyList = wordDeny.deny();
List<String> allowList = wordAllow.allow();
List<String> results = getActualDenyList(denyList, allowList);
// 初始化 DFA 信息
if(sensitiveWordMap == null) {
sensitiveWordMap = new SensitiveWordMap();
}
// 便于可以多次初始化
sensitiveWordMap.initWordMap(results);
}
private ISensitiveWordReplace sensitiveWordReplace = SensitiveWordReplaces.chars();
/**
* 获取禁止列表中真正的禁止词汇
* @param denyList 禁止
* @param allowList 允许
* @return 结果
* @since 0.1.1
* 上下文
* @since 0.3.0
*/
List<String> getActualDenyList(List<String> denyList,
List<String> allowList) {
if(CollectionUtil.isEmpty(denyList)) {
return Collections.emptyList();
}
if(CollectionUtil.isEmpty(allowList)) {
return denyList;
}
private IWordContext context = SensitiveWordContext.newInstance();
List<String> formatDenyList = this.formatWordList(denyList);
List<String> formatAllowList = this.formatWordList(allowList);
List<String> resultList = new ArrayList<>();
// O(1)
Set<String> allowSet = new HashSet<>(formatAllowList);
for(String deny : formatDenyList) {
if(allowSet.contains(deny)) {
continue;
}
resultList.add(deny);
}
return resultList;
}
/**
* 数据格式化处理
* @param list 列表
* @return 结果
* @since 0.1.1
*/
private List<String> formatWordList(List<String> list) {
if(CollectionUtil.isEmpty(list)) {
return list;
}
List<String> resultList = new ArrayList<>(list.size());
for(String word : list) {
String formatWord = InnerFormatUtils.format(word, this.context);
resultList.add(formatWord);
}
return resultList;
public SensitiveWordBs sensitiveWordReplace(ISensitiveWordReplace sensitiveWordReplace) {
ArgUtil.notNull(sensitiveWordReplace, "sensitiveWordReplace");
this.sensitiveWordReplace = sensitiveWordReplace;
return this;
}
/**
@@ -149,11 +140,68 @@ public class SensitiveWordBs {
* @return this
*/
public SensitiveWordBs init() {
// 初始化 context
this.initContext();
// 替换策略
final ICharFormat charFormat = CharFormats.initCharFormat(context);
context.charFormat(charFormat);
// 3. 初始化对应的 sensitiveCheck
final ISensitiveCheck sensitiveCheck = SensitiveChecks.initSensitiveCheck(context);
context.sensitiveCheck(sensitiveCheck);
//2. 初始化 word
this.initWordMap();
return this;
}
/**
* 构建默认的上下文
*
* @return 结果
* @since 0.0.4
*/
private IWordContext initContext() {
this.context = SensitiveWordContext.newInstance();
// 格式统一化
context.ignoreCase(ignoreCase);
context.ignoreWidth(ignoreWidth);
context.ignoreNumStyle(ignoreNumStyle);
context.ignoreChineseStyle(ignoreChineseStyle);
context.ignoreEnglishStyle(ignoreEnglishStyle);
context.ignoreRepeat(ignoreRepeat);
// 开启校验
context.sensitiveCheckNum(sensitiveCheckNum);
context.sensitiveCheckEmail(sensitiveCheckEmail);
context.sensitiveCheckUrl(sensitiveCheckUrl);
// 额外配置
context.sensitiveCheckNumLen(sensitiveCheckNumLen);
context.sensitiveWordReplace(sensitiveWordReplace);
return context;
}
/**
* DCL 初始化 wordMap 信息
*
* 注意map 的构建是一个比较耗时的动作
* @since 0.0.4
*/
private synchronized void initWordMap() {
// 加载配置信息
List<String> denyList = wordDeny.deny();
List<String> allowList = wordAllow.allow();
List<String> results = InnerWordDataUtils.getActualDenyList(denyList, allowList, context);
// 便于可以多次初始化
wordMap.initWordMap(results);
}
/**
* 设置禁止的实现
* @param wordDeny 禁止的实现
@@ -186,7 +234,7 @@ public class SensitiveWordBs {
* @return this
*/
public SensitiveWordBs enableNumCheck(boolean enableNumCheck) {
this.context.sensitiveCheckNum(enableNumCheck);
this.sensitiveCheckNum = enableNumCheck;
return this;
}
@@ -197,7 +245,7 @@ public class SensitiveWordBs {
* @since 0.2.1
*/
public SensitiveWordBs numCheckLen(int numCheckLen) {
this.context.sensitiveCheckNumLen(numCheckLen);
this.sensitiveCheckNumLen = numCheckLen;
return this;
}
@@ -209,7 +257,7 @@ public class SensitiveWordBs {
* @return this
*/
public SensitiveWordBs enableEmailCheck(boolean enableEmailCheck) {
this.context.sensitiveCheckEmail(enableEmailCheck);
this.sensitiveCheckEmail = enableEmailCheck;
return this;
}
@@ -221,7 +269,7 @@ public class SensitiveWordBs {
* @return this
*/
public SensitiveWordBs enableUrlCheck(boolean enableUrlCheck) {
this.context.sensitiveCheckUrl(enableUrlCheck);
this.sensitiveCheckUrl = enableUrlCheck;
return this;
}
@@ -232,7 +280,7 @@ public class SensitiveWordBs {
* @since 0.0.14
*/
public SensitiveWordBs ignoreCase(boolean ignoreCase) {
this.context.ignoreCase(ignoreCase);
this.ignoreCase = ignoreCase;
return this;
}
@@ -243,7 +291,7 @@ public class SensitiveWordBs {
* @since 0.0.14
*/
public SensitiveWordBs ignoreWidth(boolean ignoreWidth) {
this.context.ignoreWidth(ignoreWidth);
this.ignoreWidth = ignoreWidth;
return this;
}
@@ -254,7 +302,7 @@ public class SensitiveWordBs {
* @since 0.0.14
*/
public SensitiveWordBs ignoreNumStyle(boolean ignoreNumStyle) {
this.context.ignoreNumStyle(ignoreNumStyle);
this.ignoreNumStyle = ignoreNumStyle;
return this;
}
@@ -265,7 +313,7 @@ public class SensitiveWordBs {
* @since 0.0.14
*/
public SensitiveWordBs ignoreChineseStyle(boolean ignoreChineseStyle) {
this.context.ignoreChineseStyle(ignoreChineseStyle);
this.ignoreChineseStyle = ignoreChineseStyle;
return this;
}
@@ -276,7 +324,7 @@ public class SensitiveWordBs {
* @since 0.0.14
*/
public SensitiveWordBs ignoreEnglishStyle(boolean ignoreEnglishStyle) {
this.context.ignoreEnglishStyle(ignoreEnglishStyle);
this.ignoreEnglishStyle = ignoreEnglishStyle;
return this;
}
@@ -287,37 +335,11 @@ public class SensitiveWordBs {
* @since 0.0.14
*/
public SensitiveWordBs ignoreRepeat(boolean ignoreRepeat) {
this.context.ignoreRepeat(ignoreRepeat);
this.ignoreRepeat = ignoreRepeat;
return this;
}
/**
* 构建默认的上下文
*
* @return 结果
* @since 0.0.4
*/
private IWordContext buildDefaultContext() {
IWordContext wordContext = SensitiveWordContext.newInstance();
// 格式统一化
wordContext.ignoreCase(true);
wordContext.ignoreWidth(true);
wordContext.ignoreNumStyle(true);
wordContext.ignoreChineseStyle(true);
wordContext.ignoreEnglishStyle(true);
wordContext.ignoreRepeat(false);
// 开启校验
wordContext.sensitiveCheckNum(true);
wordContext.sensitiveCheckEmail(true);
wordContext.sensitiveCheckUrl(true);
// 额外配置
wordContext.sensitiveCheckNumLen(8);
return wordContext;
}
//------------------------------------------------------------------------------------ 公开方法 START
/**
* 是否包含敏感词
*
@@ -326,9 +348,7 @@ public class SensitiveWordBs {
* @since 0.0.1
*/
public boolean contains(final String target) {
statusCheck();
return sensitiveWordMap.contains(target, context);
return wordMap.contains(target, context);
}
/**
@@ -369,9 +389,8 @@ public class SensitiveWordBs {
*/
public <R> List<R> findAll(final String target, final IWordResultHandler<R> handler) {
ArgUtil.notNull(handler, "handler");
statusCheck();
List<IWordResult> wordResults = sensitiveWordMap.findAll(target, context);
List<IWordResult> wordResults = wordMap.findAll(target, context);
return CollectionUtil.toList(wordResults, new IHandler<IWordResult, R>() {
@Override
public R handle(IWordResult wordResult) {
@@ -392,67 +411,22 @@ public class SensitiveWordBs {
*/
public <R> R findFirst(final String target, final IWordResultHandler<R> handler) {
ArgUtil.notNull(handler, "handler");
statusCheck();
IWordResult wordResult = sensitiveWordMap.findFirst(target, context);
IWordResult wordResult = wordMap.findFirst(target, context);
return handler.handle(wordResult);
}
/**
* 替换所有内容
*
* @param target 目标字符串
* @param replaceChar 替换为的 char
* @return 替换后结果
* @since 0.0.2
*/
public String replace(final String target, final char replaceChar) {
ISensitiveWordReplace replace = new SensitiveWordReplaceChar(replaceChar);
return replace(target, replace);
}
/**
* 替换所有内容
*
* @param target 目标字符串
* @param replace 替换策略
* @return 替换后结果
* @since 0.2.0
*/
public String replace(final String target, final ISensitiveWordReplace replace) {
statusCheck();
return sensitiveWordMap.replace(target, replace, context);
}
/**
* 替换所有内容
* 1. 默认使用空格替换,避免星号改变 md 的格式。
*
* @param target 目标字符串
* @return 替换后结果
* @since 0.0.2
*/
public String replace(final String target) {
return this.replace(target, CharConst.STAR);
return wordMap.replace(target, context);
}
/**
* 状态校验
* @since 0.0.13
*/
private void statusCheck(){
//DLC
if(sensitiveWordMap == null) {
synchronized (this) {
if(sensitiveWordMap == null) {
this.init();
}
}
}
}
//------------------------------------------------------------------------------------ 公开方法 END
}

View File

@@ -1,6 +1,9 @@
package com.github.houbb.sensitive.word.bs;
import com.github.houbb.sensitive.word.api.ICharFormat;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import java.util.Map;
@@ -77,6 +80,44 @@ public class SensitiveWordContext implements IWordContext {
*/
private int sensitiveCheckNumLen;
/**
* 检测策略
* @since 0.3.0
*/
private ISensitiveCheck sensitiveCheck;
/**
* 替换策略
* @since 0.3.0
*/
private ISensitiveWordReplace sensitiveWordReplace;
/**
* 格式化
* @since 0.3.0
*/
private ICharFormat charFormat;
@Override
public ISensitiveWordReplace sensitiveWordReplace() {
return sensitiveWordReplace;
}
public SensitiveWordContext sensitiveWordReplace(ISensitiveWordReplace sensitiveWordReplace) {
this.sensitiveWordReplace = sensitiveWordReplace;
return this;
}
@Override
public ISensitiveCheck sensitiveCheck() {
return sensitiveCheck;
}
public SensitiveWordContext sensitiveCheck(ISensitiveCheck sensitiveCheck) {
this.sensitiveCheck = sensitiveCheck;
return this;
}
/**
* 私有化构造器
* @since 0.0.4
@@ -214,4 +255,13 @@ public class SensitiveWordContext implements IWordContext {
return this;
}
@Override
public ICharFormat charFormat() {
return charFormat;
}
public SensitiveWordContext charFormat(ICharFormat charFormat) {
this.charFormat = charFormat;
return this;
}
}

View File

@@ -1,5 +0,0 @@
/**
* 引导类定义
* @since 0.0.1
*/
package com.github.houbb.sensitive.word.bs;

View File

@@ -43,4 +43,10 @@ public final class AppConst {
*/
public static final String SENSITIVE_WORD_ALLOW_PATH = "/sensitive_word_allow.txt";
/**
* 最长的网址长度
* @since 0.3.0
*/
public static final int MAX_WEB_SITE_LEN = 70;
}

View File

@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.core;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
import com.github.houbb.sensitive.word.api.IWordResultHandler;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaces;
import java.util.List;
@@ -69,7 +70,8 @@ public final class SensitiveWordHelper {
* @since 0.2.0
*/
public static String replace(final String target, final ISensitiveWordReplace replace) {
return WORD_BS.replace(target, replace);
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().sensitiveWordReplace(replace).init();
return sensitiveWordBs.replace(target);
}
/**
@@ -81,7 +83,9 @@ public final class SensitiveWordHelper {
* @since 0.0.13
*/
public static String replace(final String target, final char replaceChar) {
return WORD_BS.replace(target, replaceChar);
final ISensitiveWordReplace replace = SensitiveWordReplaces.chars(replaceChar);
return replace(target, replace);
}
/**

View File

@@ -15,6 +15,15 @@ import java.util.List;
@ThreadSafe
public class WordAllowSystem implements IWordAllow {
/**
* @since 0.3.0
*/
private static final WordAllowSystem INSTANCE = new WordAllowSystem();
public static WordAllowSystem getInstance() {
return INSTANCE;
}
@Override
public List<String> allow() {
return StreamUtil.readAllLines("/sensitive_word_allow.txt");

View File

@@ -1,6 +1,5 @@
package com.github.houbb.sensitive.word.support.allow;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.util.util.ArrayUtil;
import com.github.houbb.sensitive.word.api.IWordAllow;
@@ -43,7 +42,7 @@ public final class WordAllows {
* @since 0.0.13
*/
public static IWordAllow system() {
return Instances.singleton(WordAllowSystem.class);
return WordAllowSystem.getInstance();
}
}

View File

@@ -1,58 +0,0 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
import java.util.List;
/**
* 敏感词检测责任链模式
*
* 这里可以提供一个公共的父类。
*
*
* DFA 算法的优化可以参考论文:
* 【DFA 算法】各种论文。
*
* @author binbin.hou
* @since 0.0.5
*/
@ThreadSafe
public class SensitiveCheckChain implements ISensitiveCheck {
@Override
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
// 初始化责任链
List<ISensitiveCheck> sensitiveChecks = Guavas.newArrayList();
// 默认添加敏感词校验
sensitiveChecks.add(Instances.singleton(SensitiveCheckWord.class));
if(context.sensitiveCheckNum()) {
sensitiveChecks.add(Instances.singleton(SensitiveCheckNum.class));
}
if(context.sensitiveCheckEmail()) {
sensitiveChecks.add(Instances.singleton(SensitiveCheckEmail.class));
}
if(context.sensitiveCheckUrl()) {
sensitiveChecks.add(Instances.singleton(SensitiveCheckUrl.class));
}
// 循环调用
for(ISensitiveCheck sensitiveCheck : sensitiveChecks) {
SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(txt, beginIndex, validModeEnum, context);
if(result.index() > 0) {
return result;
}
}
// 这里直接进行正则表达式相关的调用。
// 默认返回 0
return SensitiveCheckResult.of(0, SensitiveCheckChain.class);
}
}

View File

@@ -1,14 +1,12 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.util.regex.RegexUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
/**
* email 正则表达式检测实现。
@@ -28,6 +26,15 @@ import com.github.houbb.sensitive.word.support.format.CharFormatChain;
@ThreadSafe
public class SensitiveCheckEmail implements ISensitiveCheck {
/**
* @since 0.3.0
*/
private static final ISensitiveCheck INSTANCE = new SensitiveCheckEmail();
public static ISensitiveCheck getInstance() {
return INSTANCE;
}
@Override
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
// 记录敏感词的长度
@@ -40,7 +47,7 @@ public class SensitiveCheckEmail implements ISensitiveCheck {
// 后期如果有想法,对 DFA 进一步深入学习后,将进行优化。
for(int i = beginIndex; i < txt.length(); i++) {
char currentChar = txt.charAt(i);
char mappingChar = Instances.singleton(CharFormatChain.class)
char mappingChar = context.charFormat()
.format(currentChar, context);
if(CharUtil.isEmilChar(mappingChar)) {

View File

@@ -0,0 +1,51 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
import java.util.List;
/**
* 检测初始化类
* @since 0.3.0
*/
public abstract class SensitiveCheckInit implements ISensitiveCheck {
/**
* 初始化列表
*
* @param pipeline 当前列表泳道
* @since 0.0.13
*/
protected abstract void init(final Pipeline<ISensitiveCheck> pipeline);
@Override
public SensitiveCheckResult sensitiveCheck(String txt,
int beginIndex,
ValidModeEnum validModeEnum,
IWordContext context) {
Pipeline<ISensitiveCheck> pipeline = new DefaultPipeline<>();
this.init(pipeline);
List<ISensitiveCheck> sensitiveChecks = pipeline.list();
// 循环调用
for(ISensitiveCheck sensitiveCheck : sensitiveChecks) {
SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(txt, beginIndex, validModeEnum, context);
if(result.index() > 0) {
return result;
}
}
// 这里直接进行正则表达式相关的调用。
// 默认返回 0
return SensitiveCheckNone.getNoneResult();
}
}

View File

@@ -0,0 +1,41 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
/**
* 未匹配
*
* @author binbin.hou
* @since 0.3.0
*/
@ThreadSafe
public class SensitiveCheckNone implements ISensitiveCheck {
/**
* @since 0.3.0
*/
private static final ISensitiveCheck INSTANCE = new SensitiveCheckNone();
public static ISensitiveCheck getInstance() {
return INSTANCE;
}
/**
* 只有一个未匹配
*/
private static final SensitiveCheckResult NONE_RESULT = SensitiveCheckResult.of(0, SensitiveCheckNone.class);
@Override
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
return NONE_RESULT;
}
public static SensitiveCheckResult getNoneResult() {
return NONE_RESULT;
}
}

View File

@@ -1,12 +1,10 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
/**
* 敏感词监测实现
@@ -18,6 +16,15 @@ import com.github.houbb.sensitive.word.support.format.CharFormatChain;
@ThreadSafe
public class SensitiveCheckNum implements ISensitiveCheck {
/**
* @since 0.3.0
*/
private static final ISensitiveCheck INSTANCE = new SensitiveCheckNum();
public static ISensitiveCheck getInstance() {
return INSTANCE;
}
@Override
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
// 记录敏感词的长度
@@ -26,7 +33,7 @@ public class SensitiveCheckNum implements ISensitiveCheck {
for (int i = beginIndex; i < txt.length(); i++) {
char c = txt.charAt(i);
char charKey = Instances.singleton(CharFormatChain.class).format(c, context);
char charKey = context.charFormat().format(c, context);
// 如果是数字
// 满足进入的条件

View File

@@ -1,15 +1,13 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.CommonEager;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.util.regex.RegexUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
/**
* URL 正则表达式检测实现。
@@ -27,10 +25,13 @@ import com.github.houbb.sensitive.word.support.format.CharFormatChain;
public class SensitiveCheckUrl implements ISensitiveCheck {
/**
* 最长的网址长度
* @since 0.0.12
* @since 0.3.0
*/
private static final int MAX_WEB_SITE_LEN = 70;
private static final ISensitiveCheck INSTANCE = new SensitiveCheckUrl();
public static ISensitiveCheck getInstance() {
return INSTANCE;
}
@Override
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
@@ -44,11 +45,11 @@ public class SensitiveCheckUrl implements ISensitiveCheck {
// 后期如果有想法,对 DFA 进一步深入学习后,将进行优化。
for(int i = beginIndex; i < txt.length(); i++) {
char currentChar = txt.charAt(i);
char mappingChar = Instances.singleton(CharFormatChain.class)
char mappingChar = context.charFormat()
.format(currentChar, context);
if(CharUtil.isWebSiteChar(mappingChar)
&& lengthCount <= MAX_WEB_SITE_LEN) {
&& lengthCount <= AppConst.MAX_WEB_SITE_LEN) {
lengthCount++;
stringBuilder.append(currentChar);

View File

@@ -1,14 +1,12 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.lang.ObjectUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
import java.util.Map;
@@ -20,6 +18,15 @@ import java.util.Map;
@ThreadSafe
public class SensitiveCheckWord implements ISensitiveCheck {
/**
* @since 0.3.0
*/
private static final ISensitiveCheck INSTANCE = new SensitiveCheckWord();
public static ISensitiveCheck getInstance() {
return INSTANCE;
}
@Override
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
Map nowMap = context.sensitiveWordMap();
@@ -90,7 +97,7 @@ public class SensitiveCheckWord implements ISensitiveCheck {
final String txt,
final int index) {
char c = txt.charAt(index);
char mappingChar = Instances.singleton(CharFormatChain.class).format(c, context);
char mappingChar = context.charFormat().format(c, context);
// 这里做一次重复词的处理
//TODO: 这里可以优化,是否获取一次。
@@ -99,7 +106,7 @@ public class SensitiveCheckWord implements ISensitiveCheck {
if(context.ignoreRepeat()
&& index > 0) {
char preChar = txt.charAt(index-1);
char preMappingChar = Instances.singleton(CharFormatChain.class)
char preMappingChar = context.charFormat()
.format(preChar, context);
// 直接赋值为上一个 map

View File

@@ -0,0 +1,97 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.util.util.ArrayUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
/**
* 敏感词检测工具
* @since 0.3.0
*/
public final class SensitiveChecks {
private SensitiveChecks(){}
/**
* 初始化敏感检测策略
* @param context 上下文
*
* @return 实现
* @since 0.3.0
*/
public static ISensitiveCheck initSensitiveCheck(final IWordContext context) {
List<ISensitiveCheck> sensitiveCheckList = new ArrayList<>();
// 默认添加敏感词校验
sensitiveCheckList.add(SensitiveChecks.word());
if(context.sensitiveCheckNum()) {
sensitiveCheckList.add(SensitiveChecks.num());
}
if(context.sensitiveCheckEmail()) {
sensitiveCheckList.add(SensitiveChecks.email());
}
if(context.sensitiveCheckUrl()) {
sensitiveCheckList.add(SensitiveChecks.url());
}
return SensitiveChecks.chains(sensitiveCheckList);
}
public static ISensitiveCheck chains(final ISensitiveCheck... sensitiveChecks) {
if (ArrayUtil.isEmpty(sensitiveChecks)){
return none();
}
return new SensitiveCheckInit() {
@Override
protected void init(Pipeline<ISensitiveCheck> pipeline) {
for(ISensitiveCheck check : sensitiveChecks) {
pipeline.addLast(check);
}
}
};
}
public static ISensitiveCheck chains(final Collection<ISensitiveCheck> sensitiveChecks) {
if (CollectionUtil.isEmpty(sensitiveChecks)){
return none();
}
return new SensitiveCheckInit() {
@Override
protected void init(Pipeline<ISensitiveCheck> pipeline) {
for(ISensitiveCheck check : sensitiveChecks) {
pipeline.addLast(check);
}
}
};
}
public static ISensitiveCheck email() {
return SensitiveCheckEmail.getInstance();
}
public static ISensitiveCheck num() {
return SensitiveCheckNum.getInstance();
}
public static ISensitiveCheck url() {
return SensitiveCheckUrl.getInstance();
}
public static ISensitiveCheck word() {
return SensitiveCheckWord.getInstance();
}
public static ISensitiveCheck none() {
return SensitiveCheckNone.getInstance();
}
}

View File

@@ -1,53 +0,0 @@
package com.github.houbb.sensitive.word.support.data;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.heaven.util.io.StreamUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.IWordData;
import com.github.houbb.sensitive.word.constant.AppConst;
import java.util.List;
/**
* 数据加载使用单例的模式,只需要加载一次即可。
*
* @author binbin.hou
* @since 0.0.1
*/
@ThreadSafe
public class SensitiveWordData implements IWordData {
/**
* 默认的内置行
*
* @since 0.0.1
*/
private static List<String> defaultLines;
static {
synchronized (SensitiveWordData.class) {
long start = System.currentTimeMillis();
defaultLines = Guavas.newArrayList(AppConst.DICT_SIZE+AppConst.DICT_EN_SIZE);
defaultLines = StreamUtil.readAllLines("/dict.txt");
defaultLines.addAll(StreamUtil.readAllLines("/dict_en.txt"));
// 用户自定义
List<String> denyList = StreamUtil.readAllLines("/sensitive_word_deny.txt");
defaultLines.addAll(denyList);
// 移除白名单词语
List<String> allowList = StreamUtil.readAllLines("/sensitive_word_allow.txt");
defaultLines = CollectionUtil.difference(defaultLines, allowList);
long end = System.currentTimeMillis();
}
}
@Override
public List<String> getWordData() {
return defaultLines;
}
}

View File

@@ -16,6 +16,15 @@ import java.util.List;
@ThreadSafe
public class WordDenySystem implements IWordDeny {
/**
* @since 0.3.0
*/
private static final IWordDeny INSTANCE = new WordDenySystem();
public static IWordDeny getInstance() {
return INSTANCE;
}
@Override
public List<String> deny() {
List<String> results = StreamUtil.readAllLines("/dict.txt");

View File

@@ -1,6 +1,5 @@
package com.github.houbb.sensitive.word.support.deny;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.util.util.ArrayUtil;
import com.github.houbb.sensitive.word.api.IWordDeny;
@@ -43,7 +42,7 @@ public final class WordDenys {
* @since 0.0.13
*/
public static IWordDeny system() {
return Instances.singleton(WordDenySystem.class);
return WordDenySystem.getInstance();
}
}

View File

@@ -1,48 +0,0 @@
package com.github.houbb.sensitive.word.support.format;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.ICharFormat;
import java.util.List;
/**
* 格式化责任链
* @author binbin.hou
* @since 0.0.5
*/
@ThreadSafe
public class CharFormatChain implements ICharFormat {
@Override
public char format(char original, IWordContext context) {
char result = original;
List<ICharFormat> charFormats = Guavas.newArrayList();
if(context.ignoreEnglishStyle()) {
charFormats.add(Instances.singleton(IgnoreEnglishStyleFormat.class));
}
if(context.ignoreCase()) {
charFormats.add(Instances.singleton(IgnoreCaseCharFormat.class));
}
if(context.ignoreWidth()) {
charFormats.add(Instances.singleton(IgnoreWidthCharFormat.class));
}
if(context.ignoreNumStyle()) {
charFormats.add(Instances.singleton(IgnoreNumStyleCharFormat.class));
}
if(context.ignoreChineseStyle()) {
charFormats.add(Instances.singleton(IgnoreChineseStyleFormat.class));
}
// 循环执行
for(ICharFormat charFormat : charFormats) {
result = charFormat.format(result, context);
}
return result;
}
}

View File

@@ -0,0 +1,43 @@
package com.github.houbb.sensitive.word.support.format;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
import com.github.houbb.sensitive.word.api.ICharFormat;
import com.github.houbb.sensitive.word.api.IWordContext;
import java.util.List;
/**
* 格式化责任链
* @author binbin.hou
* @since 0.0.5
*/
@ThreadSafe
public abstract class CharFormatInit implements ICharFormat {
/**
* 初始化列表
*
* @param pipeline 当前列表泳道
* @since 0.0.13
*/
protected abstract void init(final Pipeline<ICharFormat> pipeline);
@Override
public char format(char original, IWordContext context) {
Pipeline<ICharFormat> pipeline = new DefaultPipeline<>();
init(pipeline);
char result = original;
// 循环执行
List<ICharFormat> charFormats = pipeline.list();
for(ICharFormat charFormat : charFormats) {
result = charFormat.format(result, context);
}
return result;
}
}

View File

@@ -0,0 +1,112 @@
package com.github.houbb.sensitive.word.support.format;
import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.heaven.util.util.ArrayUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.ICharFormat;
import com.github.houbb.sensitive.word.api.IWordContext;
import java.util.Collection;
import java.util.List;
/**
* 格式化工具类
* @author binbin.hou
* @since 0.3.5
*/
public final class CharFormats {
private CharFormats(){}
/**
* 初始化格式化
* @param context 上下文
* @return 结果
* @since 0.3.0
*/
public static ICharFormat initCharFormat(final IWordContext context) {
List<ICharFormat> charFormats = Guavas.newArrayList();
if(context.ignoreEnglishStyle()) {
charFormats.add(ignoreEnglishStyle());
}
if(context.ignoreCase()) {
charFormats.add(ignoreCase());
}
if(context.ignoreWidth()) {
charFormats.add(ignoreWidth());
}
if(context.ignoreNumStyle()) {
charFormats.add(ignoreNumStyle());
}
if(context.ignoreChineseStyle()) {
charFormats.add(ignoreChineseStyle());
}
return chains(charFormats);
}
/**
* 链式
* @param charFormats 列表
* @return 结果
*/
public static ICharFormat chains(final ICharFormat ... charFormats) {
if(ArrayUtil.isEmpty(charFormats)) {
return none();
}
return new CharFormatInit() {
@Override
protected void init(Pipeline<ICharFormat> pipeline) {
for(ICharFormat charFormat : charFormats) {
pipeline.addLast(charFormat);
}
}
};
}
/**
* 链式
* @param charFormats 列表
* @return 结果
*/
public static ICharFormat chains(final Collection<ICharFormat> charFormats) {
if(CollectionUtil.isEmpty(charFormats)) {
return none();
}
return new CharFormatInit() {
@Override
protected void init(Pipeline<ICharFormat> pipeline) {
for(ICharFormat charFormat : charFormats) {
pipeline.addLast(charFormat);
}
}
};
}
public static ICharFormat none() {
return NoneCharFormat.getInstance();
}
public static ICharFormat ignoreCase() {
return IgnoreCaseCharFormat.getInstance();
}
public static ICharFormat ignoreEnglishStyle() {
return IgnoreEnglishStyleFormat.getInstance();
}
public static ICharFormat ignoreChineseStyle() {
return IgnoreChineseStyleFormat.getInstance();
}
public static ICharFormat ignoreNumStyle() {
return IgnoreNumStyleCharFormat.getInstance();
}
public static ICharFormat ignoreWidth() {
return IgnoreWidthCharFormat.getInstance();
}
}

View File

@@ -12,6 +12,12 @@ import com.github.houbb.sensitive.word.api.IWordContext;
@ThreadSafe
public class IgnoreCaseCharFormat implements ICharFormat {
private static final ICharFormat INSTANCE = new IgnoreCaseCharFormat();
public static ICharFormat getInstance() {
return INSTANCE;
}
@Override
public char format(char original, IWordContext context) {
return Character.toLowerCase(original);

View File

@@ -7,13 +7,19 @@ import com.github.houbb.sensitive.word.api.ICharFormat;
import com.github.houbb.sensitive.word.api.IWordContext;
/**
* 忽略大小写
* 忽略中文样式
* @author binbin.hou
* @since 0.0.5
*/
@ThreadSafe
public class IgnoreChineseStyleFormat implements ICharFormat {
private static final ICharFormat INSTANCE = new IgnoreChineseStyleFormat();
public static ICharFormat getInstance() {
return INSTANCE;
}
@Override
public char format(char original, IWordContext context) {
String string = String.valueOf(original);

View File

@@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.format;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.ICharFormat;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.utils.CharUtils;
import com.github.houbb.sensitive.word.utils.InnerCharUtils;
/**
* 忽略英文的各种格式
@@ -13,9 +13,15 @@ import com.github.houbb.sensitive.word.utils.CharUtils;
@ThreadSafe
public class IgnoreEnglishStyleFormat implements ICharFormat {
private static final ICharFormat INSTANCE = new IgnoreEnglishStyleFormat();
public static ICharFormat getInstance() {
return INSTANCE;
}
@Override
public char format(char original, IWordContext context) {
return CharUtils.getMappingChar(original);
return InnerCharUtils.getMappingChar(original);
}
}

View File

@@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.format;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.ICharFormat;
import com.github.houbb.sensitive.word.utils.NumUtils;
import com.github.houbb.sensitive.word.utils.InnerNumUtils;
/**
* 忽略数字的样式
@@ -13,9 +13,15 @@ import com.github.houbb.sensitive.word.utils.NumUtils;
@ThreadSafe
public class IgnoreNumStyleCharFormat implements ICharFormat {
private static final ICharFormat INSTANCE = new IgnoreNumStyleCharFormat();
public static ICharFormat getInstance() {
return INSTANCE;
}
@Override
public char format(char original, IWordContext context) {
return NumUtils.getMappingChar(original);
return InnerNumUtils.getMappingChar(original);
}
}

View File

@@ -6,13 +6,19 @@ import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.ICharFormat;
/**
* 格式化责任链
* 格式化字宽度
* @author binbin.hou
* @since 0.0.5
*/
@ThreadSafe
public class IgnoreWidthCharFormat implements ICharFormat {
private static final ICharFormat INSTANCE = new IgnoreWidthCharFormat();
public static ICharFormat getInstance() {
return INSTANCE;
}
@Override
public char format(char original, IWordContext context) {
return CharUtil.toHalfWidth(original);

View File

@@ -0,0 +1,27 @@
package com.github.houbb.sensitive.word.support.format;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.ICharFormat;
import com.github.houbb.sensitive.word.api.IWordContext;
/**
* 无处理
*
* @author binbin.hou
* @since 0.0.5
*/
@ThreadSafe
public class NoneCharFormat implements ICharFormat {
private static final ICharFormat INSTANCE = new NoneCharFormat();
public static ICharFormat getInstance() {
return INSTANCE;
}
@Override
public char format(char original, IWordContext context) {
return original;
}
}

View File

@@ -1,19 +1,15 @@
package com.github.houbb.sensitive.word.support.map;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.heaven.util.io.FileUtil;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.lang.ObjectUtil;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.heaven.util.util.MapUtil;
import com.github.houbb.sensitive.word.api.*;
import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckChain;
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl;
import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceContext;
import com.github.houbb.sensitive.word.support.result.WordResult;
@@ -52,7 +48,6 @@ public class SensitiveWordMap implements IWordMap {
@Override
@SuppressWarnings("unchecked")
public synchronized void initWordMap(Collection<String> collection) {
long startTime = System.currentTimeMillis();
// 避免扩容带来的消耗
Map newInnerWordMap = new HashMap(collection.size());
@@ -99,8 +94,6 @@ public class SensitiveWordMap implements IWordMap {
// 最后更新为新的 map保证更新过程中旧的数据可用
this.innerWordMap = newInnerWordMap;
long endTime = System.currentTimeMillis();
}
/**
@@ -154,12 +147,12 @@ public class SensitiveWordMap implements IWordMap {
}
@Override
public String replace(String target, final ISensitiveWordReplace replace, final IWordContext context) {
public String replace(String target, final IWordContext context) {
if(StringUtil.isEmpty(target)) {
return target;
}
return this.replaceSensitiveWord(target, replace, context);
return this.replaceSensitiveWord(target, context);
}
/**
@@ -211,13 +204,11 @@ public class SensitiveWordMap implements IWordMap {
/**
* 直接替换敏感词,返回替换后的结果
* @param target 文本信息
* @param replace 替换策略
* @param context 上下文
* @return 脱敏后的字符串
* @since 0.0.2
*/
private String replaceSensitiveWord(final String target,
final ISensitiveWordReplace replace,
final IWordContext context) {
if(StringUtil.isEmpty(target)) {
return target;
@@ -245,7 +236,7 @@ public class SensitiveWordMap implements IWordMap {
ISensitiveWordReplaceContext replaceContext = SensitiveWordReplaceContext.newInstance()
.sensitiveWord(string)
.wordLength(wordLength);
String replaceStr = replace.replace(replaceContext);
String replaceStr = context.sensitiveWordReplace().replace(replaceContext);
resultBuilder.append(replaceStr);
}
@@ -267,7 +258,7 @@ public class SensitiveWordMap implements IWordMap {
context.sensitiveWordMap(innerWordMap);
// 责任链模式调用
return Instances.singleton(SensitiveCheckChain.class)
return context.sensitiveCheck()
.sensitiveCheck(txt, beginIndex, validModeEnum, context);
}

View File

@@ -0,0 +1,24 @@
package com.github.houbb.sensitive.word.support.map;
import com.github.houbb.sensitive.word.api.IWordMap;
/**
* 敏感词 map
*
* @author binbin.hou
* @since 0.3.0
*/
public final class WordMaps {
private WordMaps(){}
/**
* 默认策略
* @return 策略
* @since 0.3.0
*/
public static IWordMap defaults() {
return new SensitiveWordMap();
}
}

View File

@@ -1,6 +1,7 @@
package com.github.houbb.sensitive.word.support.replace;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.constant.CharConst;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
@@ -13,12 +14,20 @@ import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
@ThreadSafe
public class SensitiveWordReplaceChar implements ISensitiveWordReplace {
/**
* 替换的字符
* @since 0.3.0
*/
private final char replaceChar;
public SensitiveWordReplaceChar(char replaceChar) {
this.replaceChar = replaceChar;
}
public SensitiveWordReplaceChar() {
this(CharConst.STAR);
}
@Override
public String replace(ISensitiveWordReplaceContext context) {
int wordLength = context.wordLength();

View File

@@ -0,0 +1,34 @@
package com.github.houbb.sensitive.word.support.replace;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
/**
* 字符替换策略工具类
*
* @author binbin.hou
* @since 0.3.0
*/
public final class SensitiveWordReplaces {
private SensitiveWordReplaces(){}
/**
* 字符
* @param c 字符
* @return 结果
* @since 0.3.0
*/
public static ISensitiveWordReplace chars(final char c) {
return new SensitiveWordReplaceChar(c);
}
/**
* 字符,默认为 *
* @return 结果
* @since 0.3.0
*/
public static ISensitiveWordReplace chars() {
return new SensitiveWordReplaceChar();
}
}

View File

@@ -12,6 +12,15 @@ import com.github.houbb.sensitive.word.api.IWordResultHandler;
@ThreadSafe
public class WordResultHandlerRaw implements IWordResultHandler<IWordResult> {
/**
* @since 0.3.0
*/
private static final WordResultHandlerRaw INSTANCE = new WordResultHandlerRaw();
public static WordResultHandlerRaw getInstance() {
return INSTANCE;
}
@Override
public IWordResult handle(IWordResult wordResult) {
return wordResult;

View File

@@ -13,6 +13,15 @@ import com.github.houbb.sensitive.word.api.IWordResultHandler;
@ThreadSafe
public class WordResultHandlerWord implements IWordResultHandler<String> {
/**
* @since 0.3.0
*/
private static final WordResultHandlerWord INSTANCE = new WordResultHandlerWord();
public static WordResultHandlerWord getInstance() {
return INSTANCE;
}
@Override
public String handle(IWordResult wordResult) {
if(wordResult == null) {

View File

@@ -1,6 +1,5 @@
package com.github.houbb.sensitive.word.support.result;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.sensitive.word.api.IWordResult;
import com.github.houbb.sensitive.word.api.IWordResultHandler;
@@ -19,7 +18,7 @@ public final class WordResultHandlers {
* @since 0.1.0
*/
public static IWordResultHandler<IWordResult> raw() {
return Instances.singleton(WordResultHandlerRaw.class);
return WordResultHandlerRaw.getInstance();
}
/**
@@ -28,7 +27,7 @@ public final class WordResultHandlers {
* @since 0.1.0
*/
public static IWordResultHandler<String> word() {
return Instances.singleton(WordResultHandlerWord.class);
return WordResultHandlerWord.getInstance();
}
}

View File

@@ -12,9 +12,9 @@ import java.util.Map;
* @author Administrator
* @since 0.0.4
*/
public final class CharUtils {
public final class InnerCharUtils {
private CharUtils() {
private InnerCharUtils() {
}
/**

View File

@@ -1,10 +1,12 @@
package com.github.houbb.sensitive.word.utils;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.ICharFormat;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
import java.util.ArrayList;
import java.util.List;
/**
* 内部格式化工具类
@@ -21,13 +23,13 @@ public final class InnerFormatUtils {
* @return 结果
* @since 0.1.1
*/
public static String format(String original, IWordContext context) {
public static String format(final String original, final IWordContext context) {
if(StringUtil.isEmpty(original)) {
return original;
}
StringBuilder stringBuilder = new StringBuilder();
ICharFormat charFormat = Instances.singleton(CharFormatChain.class);
ICharFormat charFormat = context.charFormat();
char[] chars = original.toCharArray();
for(char c : chars) {
char cf = charFormat.format(c, context);
@@ -37,4 +39,26 @@ public final class InnerFormatUtils {
return stringBuilder.toString();
}
/**
* 格式化列表
* @param list 列表
* @param context 上下文
* @return 结果
* @since 0。3.0
*/
public static List<String> formatWordList(List<String> list,
final IWordContext context) {
if(CollectionUtil.isEmpty(list)) {
return list;
}
List<String> resultList = new ArrayList<>(list.size());
for(String word : list) {
String formatWord = InnerFormatUtils.format(word, context);
resultList.add(formatWord);
}
return resultList;
}
}

View File

@@ -15,9 +15,9 @@ import java.util.Map;
* @author Administrator
* @since 0.0.4
*/
public final class NumUtils {
public final class InnerNumUtils {
private NumUtils(){}
private InnerNumUtils(){}
private static final String NUM_ONE = "⓪0零º₀⓿○" +
"" +

View File

@@ -0,0 +1,48 @@
package com.github.houbb.sensitive.word.utils;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import java.util.*;
/**
* 数据工具包
* @since 0.3.0
*/
public final class InnerWordDataUtils {
private InnerWordDataUtils(){}
/**
* 获取禁止列表中真正的禁止词汇
* @param denyList 禁止
* @param allowList 允许
* @return 结果
* @since 0.3.0
*/
public static List<String> getActualDenyList(List<String> denyList, List<String> allowList,
final IWordContext context) {
if(CollectionUtil.isEmpty(denyList)) {
return Collections.emptyList();
}
if(CollectionUtil.isEmpty(allowList)) {
return denyList;
}
List<String> formatDenyList = InnerFormatUtils.formatWordList(denyList, context);
List<String> formatAllowList = InnerFormatUtils.formatWordList(allowList, context);
List<String> resultList = new ArrayList<>();
// O(1)
Set<String> allowSet = new HashSet<>(formatAllowList);
for(String deny : formatDenyList) {
if(allowSet.contains(deny)) {
continue;
}
resultList.add(deny);
}
return resultList;
}
}

View File

@@ -22,7 +22,7 @@ public class SensitiveWordBsChineseTest {
public void ignoreChineseStyleTest() {
final String text = "我爱我的祖国和五星紅旗。";
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[祖国, 五星紅旗]", wordList.toString());
}

View File

@@ -22,7 +22,7 @@ public class SensitiveWordBsEmailTest {
public void emailEnglishTest() {
final String text = "楼主好人,邮箱 sensitiveword@xx.com";
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[邮箱, sensitiveword@xx.com]", wordList.toString());
}
@@ -34,7 +34,7 @@ public class SensitiveWordBsEmailTest {
public void emailNumberTest() {
final String text = "楼主好人,邮箱 123456789@xx.com";
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[邮箱, 123456789, xx.com]", wordList.toString());
}

View File

@@ -22,7 +22,7 @@ public class SensitiveWordBsEnglishTest {
public void ignoreEnglishStyleTest() {
final String text = "Ⓕⓤc⒦ the bad words";
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[Ⓕⓤc⒦]", wordList.toString());
}

View File

@@ -23,12 +23,13 @@ public class SensitiveWordBsNumLenTest {
final String text = "你懂得12345678";
// 默认检测 8 位
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[12345678]", wordList.toString());
// 指定数字的长度,避免误杀
List<String> wordList2 = SensitiveWordBs.newInstance()
.numCheckLen(9)
.init()
.findAll(text);
Assert.assertEquals("[]", wordList2.toString());
}

View File

@@ -22,7 +22,7 @@ public class SensitiveWordBsNumTest {
public void findAllTest() {
final String text = "这个是我的微信9989123456";
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[微信, 9989123456]", wordList.toString());
}
@@ -34,7 +34,7 @@ public class SensitiveWordBsNumTest {
public void ignoreNumStyleTest() {
final String text = "这个是我的微信9⓿二肆⁹₈③⑸⒋➃㈤㊄";
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[微信, 9⓿二肆⁹₈③⑸⒋➃㈤㊄]", wordList.toString());
}

View File

@@ -24,6 +24,7 @@ public class SensitiveWordBsRepeatTest {
List<String> wordList = SensitiveWordBs.newInstance()
.ignoreRepeat(true)
.init()
.findAll(text);
Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString());
}

View File

@@ -2,6 +2,7 @@ package com.github.houbb.sensitive.word.bs;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaces;
import org.junit.Assert;
import org.junit.Test;
@@ -24,7 +25,7 @@ public class SensitiveWordBsTest {
public void containsTest() {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
Assert.assertTrue(SensitiveWordBs.newInstance().contains(text));
Assert.assertTrue(SensitiveWordBs.newInstance().init().contains(text));
}
/**
@@ -35,7 +36,7 @@ public class SensitiveWordBsTest {
public void findAllTest() {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString());
}
@@ -47,7 +48,7 @@ public class SensitiveWordBsTest {
public void findFirstTest() {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
String word = SensitiveWordBs.newInstance().findFirst(text);
String word = SensitiveWordBs.newInstance().init().findFirst(text);
Assert.assertEquals("五星红旗", word);
}
@@ -59,7 +60,7 @@ public class SensitiveWordBsTest {
public void replaceTest() {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
String result = SensitiveWordBs.newInstance().replace(text);
String result = SensitiveWordBs.newInstance().init().replace(text);
Assert.assertEquals("****迎风飘扬,***的画像屹立在***前。", result);
}
@@ -71,7 +72,10 @@ public class SensitiveWordBsTest {
public void replaceCharTest() {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
String result = SensitiveWordBs.newInstance().replace(text, '0');
String result = SensitiveWordBs.newInstance()
.sensitiveWordReplace(SensitiveWordReplaces.chars('0'))
.init()
.replace(text);
Assert.assertEquals("0000迎风飘扬000的画像屹立在000前。", result);
}
@@ -83,7 +87,7 @@ public class SensitiveWordBsTest {
public void ignoreCaseTest() {
final String text = "fuCK the bad words.";
String word = SensitiveWordBs.newInstance().findFirst(text);
String word = SensitiveWordBs.newInstance().init().findFirst(text);
Assert.assertEquals("fuCK", word);
}
@@ -95,7 +99,7 @@ public class SensitiveWordBsTest {
public void ignoreWidthTest() {
final String text = " the bad words.";
String word = SensitiveWordBs.newInstance().findFirst(text);
String word = SensitiveWordBs.newInstance().init().findFirst(text);
Assert.assertEquals("", word);
}

View File

@@ -22,11 +22,13 @@ public class SensitiveWordBsUrlTest {
public void commonUrlTest() {
final String text = "点击链接 www.baidu.com查看答案";
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[链接, www.baidu.com]", wordList.toString());
Assert.assertEquals("点击** *************查看答案", SensitiveWordBs
.newInstance().replace(text));
.newInstance()
.init()
.replace(text));
}
/**
@@ -41,10 +43,10 @@ public class SensitiveWordBsUrlTest {
public void imageUrlTest() {
final String text = "双击查看大图 www.big-image.png查看";
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[www.big-image.png]", wordList.toString());
Assert.assertEquals(text, SensitiveWordBs.newInstance().replace(text));
Assert.assertEquals(text, SensitiveWordBs.newInstance().init().replace(text));
}
}

View File

@@ -22,7 +22,7 @@ public class SensitiveWordBsUserDefineTest {
public void allowAndDenyTest() {
final String text = "gender 我们认为应该通过,自定义敏感词我们认为应该拒绝。";
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[自定义敏感词]", wordList.toString());
}

View File

@@ -1,12 +1,6 @@
package com.github.houbb.sensitive.word.data;
import com.github.houbb.heaven.support.handler.IHandler;
import com.github.houbb.heaven.util.io.FileUtil;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.opencc4j.core.impl.ZhConvertBootstrap;
import com.github.houbb.opencc4j.support.segment.impl.CharSegment;
import com.github.houbb.sensitive.word.utils.NumUtils;
import org.junit.Ignore;
import org.junit.Test;

View File

@@ -3,12 +3,11 @@ package com.github.houbb.sensitive.word.data;
import com.github.houbb.heaven.support.filter.IFilter;
import com.github.houbb.heaven.support.handler.IHandler;
import com.github.houbb.heaven.util.io.FileUtil;
import com.github.houbb.heaven.util.lang.NumUtil;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.opencc4j.core.impl.ZhConvertBootstrap;
import com.github.houbb.opencc4j.support.segment.impl.CharSegment;
import com.github.houbb.sensitive.word.utils.NumUtils;
import com.github.houbb.sensitive.word.utils.InnerNumUtils;
import org.junit.Ignore;
import org.junit.Test;
@@ -128,7 +127,7 @@ public class DictSlimTest {
// 停顿词语
String trim = string.replaceAll("加|否|与|和", "");
String mapString = NumUtils.getMappingString(trim);
String mapString = InnerNumUtils.getMappingString(trim);
boolean result = StringUtil.isDigit(mapString);
if(result) {
System.out.println(string);