mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
release branch 0.3.0
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
package com.github.houbb.sensitive.word.api;
|
||||
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
@@ -173,4 +175,51 @@ public interface IWordContext {
|
||||
*/
|
||||
IWordContext sensitiveCheckNumLen(final int sensitiveCheckNumLen);
|
||||
|
||||
/**
|
||||
* 设置检测策略
|
||||
* @param sensitiveCheck 检测策略
|
||||
* @return this
|
||||
* @since 0.3.0
|
||||
*/
|
||||
IWordContext sensitiveCheck(final ISensitiveCheck sensitiveCheck);
|
||||
|
||||
/**
|
||||
* 获取检测策略
|
||||
* @return 检测策略
|
||||
* @since 0.3.0
|
||||
*/
|
||||
ISensitiveCheck sensitiveCheck();
|
||||
|
||||
/**
|
||||
* 设置敏感词替换策略
|
||||
* @param sensitiveWordReplace 策略
|
||||
* @return this
|
||||
* @since 0.3.0
|
||||
*/
|
||||
IWordContext sensitiveWordReplace(final ISensitiveWordReplace sensitiveWordReplace);
|
||||
|
||||
/**
|
||||
* 敏感词替换策略
|
||||
* @return 替换策略
|
||||
* @since 0.3.0
|
||||
*/
|
||||
ISensitiveWordReplace sensitiveWordReplace();
|
||||
|
||||
/**
|
||||
* 设置统一的字符处理
|
||||
*
|
||||
* @param charFormat 字符处理
|
||||
* @return 结果
|
||||
* @since 0.3.0
|
||||
*/
|
||||
IWordContext charFormat(final ICharFormat charFormat);
|
||||
|
||||
/**
|
||||
* 获取格式化策略
|
||||
*
|
||||
* @return 策略
|
||||
* @since 0.3.0
|
||||
*/
|
||||
ICharFormat charFormat();
|
||||
|
||||
}
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
package com.github.houbb.sensitive.word.api;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 数据词接口
|
||||
* @author binbin.hou
|
||||
* @since 0.0.1
|
||||
*/
|
||||
@Deprecated
|
||||
public interface IWordData {
|
||||
|
||||
/**
|
||||
* 获取对应的敏感词
|
||||
* @return 结果
|
||||
* @since 0.0.1
|
||||
*/
|
||||
List<String> getWordData();
|
||||
|
||||
}
|
||||
@@ -59,13 +59,11 @@ public interface IWordMap extends ISensitiveCheck {
|
||||
* ps: 这里可以添加优化。
|
||||
*
|
||||
* @param target 目标字符串
|
||||
* @param replace 替换策略
|
||||
* @param context 上下文
|
||||
* @return 替换后结果
|
||||
* @since 0.0.2
|
||||
*/
|
||||
String replace(final String target,
|
||||
final ISensitiveWordReplace replace,
|
||||
final IWordContext context);
|
||||
|
||||
}
|
||||
|
||||
@@ -1,18 +1,20 @@
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import com.github.houbb.heaven.constant.CharConst;
|
||||
import com.github.houbb.heaven.support.handler.IHandler;
|
||||
import com.github.houbb.heaven.util.common.ArgUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.sensitive.word.api.*;
|
||||
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.impl.SensitiveChecks;
|
||||
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||
import com.github.houbb.sensitive.word.support.map.SensitiveWordMap;
|
||||
import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceChar;
|
||||
import com.github.houbb.sensitive.word.support.format.CharFormats;
|
||||
import com.github.houbb.sensitive.word.support.map.WordMaps;
|
||||
import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaces;
|
||||
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
|
||||
import com.github.houbb.sensitive.word.utils.InnerFormatUtils;
|
||||
import com.github.houbb.sensitive.word.utils.InnerWordDataUtils;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 敏感词引导类
|
||||
@@ -30,19 +32,63 @@ public class SensitiveWordBs {
|
||||
private SensitiveWordBs() {
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------- 基本属性 START
|
||||
// 格式统一化
|
||||
/**
|
||||
* 是否忽略大小写
|
||||
*/
|
||||
private boolean ignoreCase = true;
|
||||
/**
|
||||
* 是否忽略全角、半角
|
||||
*/
|
||||
private boolean ignoreWidth = true;
|
||||
/**
|
||||
* 是否忽略数字样式
|
||||
*/
|
||||
private boolean ignoreNumStyle = true;
|
||||
/**
|
||||
* 是否忽略中文样式
|
||||
*/
|
||||
private boolean ignoreChineseStyle = true;
|
||||
/**
|
||||
* 是否忽略英文样式
|
||||
*/
|
||||
private boolean ignoreEnglishStyle = true;
|
||||
/**
|
||||
* 是否忽略重复
|
||||
*/
|
||||
private boolean ignoreRepeat = false;
|
||||
|
||||
// 开启校验
|
||||
/**
|
||||
* 启用数字检测
|
||||
*/
|
||||
private boolean sensitiveCheckNum = true;
|
||||
/**
|
||||
* 启用邮箱检测
|
||||
*/
|
||||
private boolean sensitiveCheckEmail = true;
|
||||
/**
|
||||
* 启用 URL 检测
|
||||
*/
|
||||
private boolean sensitiveCheckUrl = true;
|
||||
|
||||
// 额外配置
|
||||
/**
|
||||
* 检测数字时的长度
|
||||
*/
|
||||
private int sensitiveCheckNumLen = 8;
|
||||
|
||||
//------------------------------------------------------------- 基本属性 END
|
||||
/**
|
||||
* 敏感词 map
|
||||
*
|
||||
* TODO: 暂时定义为 final,后续放开抽象。
|
||||
*
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private IWordMap sensitiveWordMap;
|
||||
|
||||
/**
|
||||
* 默认的执行上下文
|
||||
*
|
||||
* @since 0.0.4
|
||||
*/
|
||||
private final IWordContext context = buildDefaultContext();
|
||||
private final IWordMap wordMap = WordMaps.defaults();
|
||||
|
||||
/**
|
||||
* 禁止的单词
|
||||
@@ -57,76 +103,21 @@ public class SensitiveWordBs {
|
||||
private IWordAllow wordAllow = WordAllows.system();
|
||||
|
||||
/**
|
||||
* DCL 初始化 wordMap 信息
|
||||
*
|
||||
* 注意:map 的构建是一个比较耗时的动作
|
||||
* @since 0.0.4
|
||||
* 替换策略
|
||||
* @since 0.3.0
|
||||
*/
|
||||
private synchronized void initWordMap() {
|
||||
// 加载配置信息
|
||||
List<String> denyList = wordDeny.deny();
|
||||
List<String> allowList = wordAllow.allow();
|
||||
List<String> results = getActualDenyList(denyList, allowList);
|
||||
|
||||
// 初始化 DFA 信息
|
||||
if(sensitiveWordMap == null) {
|
||||
sensitiveWordMap = new SensitiveWordMap();
|
||||
}
|
||||
// 便于可以多次初始化
|
||||
sensitiveWordMap.initWordMap(results);
|
||||
}
|
||||
private ISensitiveWordReplace sensitiveWordReplace = SensitiveWordReplaces.chars();
|
||||
|
||||
/**
|
||||
* 获取禁止列表中真正的禁止词汇
|
||||
* @param denyList 禁止
|
||||
* @param allowList 允许
|
||||
* @return 结果
|
||||
* @since 0.1.1
|
||||
* 上下文
|
||||
* @since 0.3.0
|
||||
*/
|
||||
List<String> getActualDenyList(List<String> denyList,
|
||||
List<String> allowList) {
|
||||
if(CollectionUtil.isEmpty(denyList)) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
if(CollectionUtil.isEmpty(allowList)) {
|
||||
return denyList;
|
||||
}
|
||||
private IWordContext context = SensitiveWordContext.newInstance();
|
||||
|
||||
List<String> formatDenyList = this.formatWordList(denyList);
|
||||
List<String> formatAllowList = this.formatWordList(allowList);
|
||||
|
||||
List<String> resultList = new ArrayList<>();
|
||||
// O(1)
|
||||
Set<String> allowSet = new HashSet<>(formatAllowList);
|
||||
|
||||
for(String deny : formatDenyList) {
|
||||
if(allowSet.contains(deny)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
resultList.add(deny);
|
||||
}
|
||||
return resultList;
|
||||
}
|
||||
|
||||
/**
|
||||
* 数据格式化处理
|
||||
* @param list 列表
|
||||
* @return 结果
|
||||
* @since 0.1.1
|
||||
*/
|
||||
private List<String> formatWordList(List<String> list) {
|
||||
if(CollectionUtil.isEmpty(list)) {
|
||||
return list;
|
||||
}
|
||||
|
||||
List<String> resultList = new ArrayList<>(list.size());
|
||||
for(String word : list) {
|
||||
String formatWord = InnerFormatUtils.format(word, this.context);
|
||||
resultList.add(formatWord);
|
||||
}
|
||||
|
||||
return resultList;
|
||||
public SensitiveWordBs sensitiveWordReplace(ISensitiveWordReplace sensitiveWordReplace) {
|
||||
ArgUtil.notNull(sensitiveWordReplace, "sensitiveWordReplace");
|
||||
this.sensitiveWordReplace = sensitiveWordReplace;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -149,11 +140,68 @@ public class SensitiveWordBs {
|
||||
* @return this
|
||||
*/
|
||||
public SensitiveWordBs init() {
|
||||
// 初始化 context
|
||||
this.initContext();
|
||||
|
||||
// 替换策略
|
||||
final ICharFormat charFormat = CharFormats.initCharFormat(context);
|
||||
context.charFormat(charFormat);
|
||||
|
||||
// 3. 初始化对应的 sensitiveCheck
|
||||
final ISensitiveCheck sensitiveCheck = SensitiveChecks.initSensitiveCheck(context);
|
||||
context.sensitiveCheck(sensitiveCheck);
|
||||
|
||||
//2. 初始化 word
|
||||
this.initWordMap();
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建默认的上下文
|
||||
*
|
||||
* @return 结果
|
||||
* @since 0.0.4
|
||||
*/
|
||||
private IWordContext initContext() {
|
||||
this.context = SensitiveWordContext.newInstance();
|
||||
|
||||
// 格式统一化
|
||||
context.ignoreCase(ignoreCase);
|
||||
context.ignoreWidth(ignoreWidth);
|
||||
context.ignoreNumStyle(ignoreNumStyle);
|
||||
context.ignoreChineseStyle(ignoreChineseStyle);
|
||||
context.ignoreEnglishStyle(ignoreEnglishStyle);
|
||||
context.ignoreRepeat(ignoreRepeat);
|
||||
|
||||
// 开启校验
|
||||
context.sensitiveCheckNum(sensitiveCheckNum);
|
||||
context.sensitiveCheckEmail(sensitiveCheckEmail);
|
||||
context.sensitiveCheckUrl(sensitiveCheckUrl);
|
||||
|
||||
// 额外配置
|
||||
context.sensitiveCheckNumLen(sensitiveCheckNumLen);
|
||||
context.sensitiveWordReplace(sensitiveWordReplace);
|
||||
|
||||
return context;
|
||||
}
|
||||
|
||||
/**
|
||||
* DCL 初始化 wordMap 信息
|
||||
*
|
||||
* 注意:map 的构建是一个比较耗时的动作
|
||||
* @since 0.0.4
|
||||
*/
|
||||
private synchronized void initWordMap() {
|
||||
// 加载配置信息
|
||||
List<String> denyList = wordDeny.deny();
|
||||
List<String> allowList = wordAllow.allow();
|
||||
List<String> results = InnerWordDataUtils.getActualDenyList(denyList, allowList, context);
|
||||
|
||||
// 便于可以多次初始化
|
||||
wordMap.initWordMap(results);
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置禁止的实现
|
||||
* @param wordDeny 禁止的实现
|
||||
@@ -186,7 +234,7 @@ public class SensitiveWordBs {
|
||||
* @return this
|
||||
*/
|
||||
public SensitiveWordBs enableNumCheck(boolean enableNumCheck) {
|
||||
this.context.sensitiveCheckNum(enableNumCheck);
|
||||
this.sensitiveCheckNum = enableNumCheck;
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -197,7 +245,7 @@ public class SensitiveWordBs {
|
||||
* @since 0.2.1
|
||||
*/
|
||||
public SensitiveWordBs numCheckLen(int numCheckLen) {
|
||||
this.context.sensitiveCheckNumLen(numCheckLen);
|
||||
this.sensitiveCheckNumLen = numCheckLen;
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -209,7 +257,7 @@ public class SensitiveWordBs {
|
||||
* @return this
|
||||
*/
|
||||
public SensitiveWordBs enableEmailCheck(boolean enableEmailCheck) {
|
||||
this.context.sensitiveCheckEmail(enableEmailCheck);
|
||||
this.sensitiveCheckEmail = enableEmailCheck;
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -221,7 +269,7 @@ public class SensitiveWordBs {
|
||||
* @return this
|
||||
*/
|
||||
public SensitiveWordBs enableUrlCheck(boolean enableUrlCheck) {
|
||||
this.context.sensitiveCheckUrl(enableUrlCheck);
|
||||
this.sensitiveCheckUrl = enableUrlCheck;
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -232,7 +280,7 @@ public class SensitiveWordBs {
|
||||
* @since 0.0.14
|
||||
*/
|
||||
public SensitiveWordBs ignoreCase(boolean ignoreCase) {
|
||||
this.context.ignoreCase(ignoreCase);
|
||||
this.ignoreCase = ignoreCase;
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -243,7 +291,7 @@ public class SensitiveWordBs {
|
||||
* @since 0.0.14
|
||||
*/
|
||||
public SensitiveWordBs ignoreWidth(boolean ignoreWidth) {
|
||||
this.context.ignoreWidth(ignoreWidth);
|
||||
this.ignoreWidth = ignoreWidth;
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -254,7 +302,7 @@ public class SensitiveWordBs {
|
||||
* @since 0.0.14
|
||||
*/
|
||||
public SensitiveWordBs ignoreNumStyle(boolean ignoreNumStyle) {
|
||||
this.context.ignoreNumStyle(ignoreNumStyle);
|
||||
this.ignoreNumStyle = ignoreNumStyle;
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -265,7 +313,7 @@ public class SensitiveWordBs {
|
||||
* @since 0.0.14
|
||||
*/
|
||||
public SensitiveWordBs ignoreChineseStyle(boolean ignoreChineseStyle) {
|
||||
this.context.ignoreChineseStyle(ignoreChineseStyle);
|
||||
this.ignoreChineseStyle = ignoreChineseStyle;
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -276,7 +324,7 @@ public class SensitiveWordBs {
|
||||
* @since 0.0.14
|
||||
*/
|
||||
public SensitiveWordBs ignoreEnglishStyle(boolean ignoreEnglishStyle) {
|
||||
this.context.ignoreEnglishStyle(ignoreEnglishStyle);
|
||||
this.ignoreEnglishStyle = ignoreEnglishStyle;
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -287,37 +335,11 @@ public class SensitiveWordBs {
|
||||
* @since 0.0.14
|
||||
*/
|
||||
public SensitiveWordBs ignoreRepeat(boolean ignoreRepeat) {
|
||||
this.context.ignoreRepeat(ignoreRepeat);
|
||||
this.ignoreRepeat = ignoreRepeat;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建默认的上下文
|
||||
*
|
||||
* @return 结果
|
||||
* @since 0.0.4
|
||||
*/
|
||||
private IWordContext buildDefaultContext() {
|
||||
IWordContext wordContext = SensitiveWordContext.newInstance();
|
||||
// 格式统一化
|
||||
wordContext.ignoreCase(true);
|
||||
wordContext.ignoreWidth(true);
|
||||
wordContext.ignoreNumStyle(true);
|
||||
wordContext.ignoreChineseStyle(true);
|
||||
wordContext.ignoreEnglishStyle(true);
|
||||
wordContext.ignoreRepeat(false);
|
||||
|
||||
// 开启校验
|
||||
wordContext.sensitiveCheckNum(true);
|
||||
wordContext.sensitiveCheckEmail(true);
|
||||
wordContext.sensitiveCheckUrl(true);
|
||||
|
||||
// 额外配置
|
||||
wordContext.sensitiveCheckNumLen(8);
|
||||
|
||||
return wordContext;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------------ 公开方法 START
|
||||
/**
|
||||
* 是否包含敏感词
|
||||
*
|
||||
@@ -326,9 +348,7 @@ public class SensitiveWordBs {
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public boolean contains(final String target) {
|
||||
statusCheck();
|
||||
|
||||
return sensitiveWordMap.contains(target, context);
|
||||
return wordMap.contains(target, context);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -369,9 +389,8 @@ public class SensitiveWordBs {
|
||||
*/
|
||||
public <R> List<R> findAll(final String target, final IWordResultHandler<R> handler) {
|
||||
ArgUtil.notNull(handler, "handler");
|
||||
statusCheck();
|
||||
|
||||
List<IWordResult> wordResults = sensitiveWordMap.findAll(target, context);
|
||||
List<IWordResult> wordResults = wordMap.findAll(target, context);
|
||||
return CollectionUtil.toList(wordResults, new IHandler<IWordResult, R>() {
|
||||
@Override
|
||||
public R handle(IWordResult wordResult) {
|
||||
@@ -392,67 +411,22 @@ public class SensitiveWordBs {
|
||||
*/
|
||||
public <R> R findFirst(final String target, final IWordResultHandler<R> handler) {
|
||||
ArgUtil.notNull(handler, "handler");
|
||||
statusCheck();
|
||||
|
||||
IWordResult wordResult = sensitiveWordMap.findFirst(target, context);
|
||||
IWordResult wordResult = wordMap.findFirst(target, context);
|
||||
return handler.handle(wordResult);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 替换所有内容
|
||||
*
|
||||
* @param target 目标字符串
|
||||
* @param replaceChar 替换为的 char
|
||||
* @return 替换后结果
|
||||
* @since 0.0.2
|
||||
*/
|
||||
public String replace(final String target, final char replaceChar) {
|
||||
ISensitiveWordReplace replace = new SensitiveWordReplaceChar(replaceChar);
|
||||
|
||||
return replace(target, replace);
|
||||
}
|
||||
|
||||
/**
|
||||
* 替换所有内容
|
||||
*
|
||||
* @param target 目标字符串
|
||||
* @param replace 替换策略
|
||||
* @return 替换后结果
|
||||
* @since 0.2.0
|
||||
*/
|
||||
public String replace(final String target, final ISensitiveWordReplace replace) {
|
||||
statusCheck();
|
||||
|
||||
return sensitiveWordMap.replace(target, replace, context);
|
||||
}
|
||||
|
||||
/**
|
||||
* 替换所有内容
|
||||
* 1. 默认使用空格替换,避免星号改变 md 的格式。
|
||||
*
|
||||
* @param target 目标字符串
|
||||
* @return 替换后结果
|
||||
* @since 0.0.2
|
||||
*/
|
||||
public String replace(final String target) {
|
||||
return this.replace(target, CharConst.STAR);
|
||||
return wordMap.replace(target, context);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 状态校验
|
||||
* @since 0.0.13
|
||||
*/
|
||||
private void statusCheck(){
|
||||
//DLC
|
||||
if(sensitiveWordMap == null) {
|
||||
synchronized (this) {
|
||||
if(sensitiveWordMap == null) {
|
||||
this.init();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//------------------------------------------------------------------------------------ 公开方法 END
|
||||
|
||||
}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.ICharFormat;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
@@ -77,6 +80,44 @@ public class SensitiveWordContext implements IWordContext {
|
||||
*/
|
||||
private int sensitiveCheckNumLen;
|
||||
|
||||
/**
|
||||
* 检测策略
|
||||
* @since 0.3.0
|
||||
*/
|
||||
private ISensitiveCheck sensitiveCheck;
|
||||
|
||||
/**
|
||||
* 替换策略
|
||||
* @since 0.3.0
|
||||
*/
|
||||
private ISensitiveWordReplace sensitiveWordReplace;
|
||||
|
||||
/**
|
||||
* 格式化
|
||||
* @since 0.3.0
|
||||
*/
|
||||
private ICharFormat charFormat;
|
||||
|
||||
@Override
|
||||
public ISensitiveWordReplace sensitiveWordReplace() {
|
||||
return sensitiveWordReplace;
|
||||
}
|
||||
|
||||
public SensitiveWordContext sensitiveWordReplace(ISensitiveWordReplace sensitiveWordReplace) {
|
||||
this.sensitiveWordReplace = sensitiveWordReplace;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ISensitiveCheck sensitiveCheck() {
|
||||
return sensitiveCheck;
|
||||
}
|
||||
|
||||
public SensitiveWordContext sensitiveCheck(ISensitiveCheck sensitiveCheck) {
|
||||
this.sensitiveCheck = sensitiveCheck;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 私有化构造器
|
||||
* @since 0.0.4
|
||||
@@ -214,4 +255,13 @@ public class SensitiveWordContext implements IWordContext {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ICharFormat charFormat() {
|
||||
return charFormat;
|
||||
}
|
||||
|
||||
public SensitiveWordContext charFormat(ICharFormat charFormat) {
|
||||
this.charFormat = charFormat;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
/**
|
||||
* 引导类定义
|
||||
* @since 0.0.1
|
||||
*/
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
@@ -43,4 +43,10 @@ public final class AppConst {
|
||||
*/
|
||||
public static final String SENSITIVE_WORD_ALLOW_PATH = "/sensitive_word_allow.txt";
|
||||
|
||||
/**
|
||||
* 最长的网址长度
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public static final int MAX_WEB_SITE_LEN = 70;
|
||||
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.core;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
|
||||
import com.github.houbb.sensitive.word.api.IWordResultHandler;
|
||||
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||
import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaces;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@@ -69,7 +70,8 @@ public final class SensitiveWordHelper {
|
||||
* @since 0.2.0
|
||||
*/
|
||||
public static String replace(final String target, final ISensitiveWordReplace replace) {
|
||||
return WORD_BS.replace(target, replace);
|
||||
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().sensitiveWordReplace(replace).init();
|
||||
return sensitiveWordBs.replace(target);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -81,7 +83,9 @@ public final class SensitiveWordHelper {
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public static String replace(final String target, final char replaceChar) {
|
||||
return WORD_BS.replace(target, replaceChar);
|
||||
final ISensitiveWordReplace replace = SensitiveWordReplaces.chars(replaceChar);
|
||||
|
||||
return replace(target, replace);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -15,6 +15,15 @@ import java.util.List;
|
||||
@ThreadSafe
|
||||
public class WordAllowSystem implements IWordAllow {
|
||||
|
||||
/**
|
||||
* @since 0.3.0
|
||||
*/
|
||||
private static final WordAllowSystem INSTANCE = new WordAllowSystem();
|
||||
|
||||
public static WordAllowSystem getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> allow() {
|
||||
return StreamUtil.readAllLines("/sensitive_word_allow.txt");
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
package com.github.houbb.sensitive.word.support.allow;
|
||||
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.support.pipeline.Pipeline;
|
||||
import com.github.houbb.heaven.util.util.ArrayUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordAllow;
|
||||
@@ -43,7 +42,7 @@ public final class WordAllows {
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public static IWordAllow system() {
|
||||
return Instances.singleton(WordAllowSystem.class);
|
||||
return WordAllowSystem.getInstance();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.guava.Guavas;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 敏感词检测责任链模式
|
||||
*
|
||||
* 这里可以提供一个公共的父类。
|
||||
*
|
||||
*
|
||||
* DFA 算法的优化可以参考论文:
|
||||
* 【DFA 算法】各种论文。
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.0.5
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class SensitiveCheckChain implements ISensitiveCheck {
|
||||
|
||||
@Override
|
||||
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
// 初始化责任链
|
||||
List<ISensitiveCheck> sensitiveChecks = Guavas.newArrayList();
|
||||
// 默认添加敏感词校验
|
||||
sensitiveChecks.add(Instances.singleton(SensitiveCheckWord.class));
|
||||
if(context.sensitiveCheckNum()) {
|
||||
sensitiveChecks.add(Instances.singleton(SensitiveCheckNum.class));
|
||||
}
|
||||
if(context.sensitiveCheckEmail()) {
|
||||
sensitiveChecks.add(Instances.singleton(SensitiveCheckEmail.class));
|
||||
}
|
||||
if(context.sensitiveCheckUrl()) {
|
||||
sensitiveChecks.add(Instances.singleton(SensitiveCheckUrl.class));
|
||||
}
|
||||
|
||||
// 循环调用
|
||||
for(ISensitiveCheck sensitiveCheck : sensitiveChecks) {
|
||||
SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(txt, beginIndex, validModeEnum, context);
|
||||
|
||||
if(result.index() > 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// 这里直接进行正则表达式相关的调用。
|
||||
// 默认返回 0
|
||||
return SensitiveCheckResult.of(0, SensitiveCheckChain.class);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,14 +1,12 @@
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.lang.CharUtil;
|
||||
import com.github.houbb.heaven.util.util.regex.RegexUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||
|
||||
/**
|
||||
* email 正则表达式检测实现。
|
||||
@@ -28,6 +26,15 @@ import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||
@ThreadSafe
|
||||
public class SensitiveCheckEmail implements ISensitiveCheck {
|
||||
|
||||
/**
|
||||
* @since 0.3.0
|
||||
*/
|
||||
private static final ISensitiveCheck INSTANCE = new SensitiveCheckEmail();
|
||||
|
||||
public static ISensitiveCheck getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
// 记录敏感词的长度
|
||||
@@ -40,7 +47,7 @@ public class SensitiveCheckEmail implements ISensitiveCheck {
|
||||
// 后期如果有想法,对 DFA 进一步深入学习后,将进行优化。
|
||||
for(int i = beginIndex; i < txt.length(); i++) {
|
||||
char currentChar = txt.charAt(i);
|
||||
char mappingChar = Instances.singleton(CharFormatChain.class)
|
||||
char mappingChar = context.charFormat()
|
||||
.format(currentChar, context);
|
||||
|
||||
if(CharUtil.isEmilChar(mappingChar)) {
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.support.pipeline.Pipeline;
|
||||
import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 检测初始化类
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public abstract class SensitiveCheckInit implements ISensitiveCheck {
|
||||
|
||||
/**
|
||||
* 初始化列表
|
||||
*
|
||||
* @param pipeline 当前列表泳道
|
||||
* @since 0.0.13
|
||||
*/
|
||||
protected abstract void init(final Pipeline<ISensitiveCheck> pipeline);
|
||||
|
||||
|
||||
@Override
|
||||
public SensitiveCheckResult sensitiveCheck(String txt,
|
||||
int beginIndex,
|
||||
ValidModeEnum validModeEnum,
|
||||
IWordContext context) {
|
||||
|
||||
Pipeline<ISensitiveCheck> pipeline = new DefaultPipeline<>();
|
||||
this.init(pipeline);
|
||||
List<ISensitiveCheck> sensitiveChecks = pipeline.list();
|
||||
|
||||
// 循环调用
|
||||
for(ISensitiveCheck sensitiveCheck : sensitiveChecks) {
|
||||
SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(txt, beginIndex, validModeEnum, context);
|
||||
|
||||
if(result.index() > 0) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// 这里直接进行正则表达式相关的调用。
|
||||
// 默认返回 0
|
||||
return SensitiveCheckNone.getNoneResult();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
|
||||
/**
|
||||
* 未匹配
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.3.0
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class SensitiveCheckNone implements ISensitiveCheck {
|
||||
|
||||
/**
|
||||
* @since 0.3.0
|
||||
*/
|
||||
private static final ISensitiveCheck INSTANCE = new SensitiveCheckNone();
|
||||
|
||||
public static ISensitiveCheck getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
/**
|
||||
* 只有一个未匹配
|
||||
*/
|
||||
private static final SensitiveCheckResult NONE_RESULT = SensitiveCheckResult.of(0, SensitiveCheckNone.class);
|
||||
|
||||
@Override
|
||||
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
return NONE_RESULT;
|
||||
}
|
||||
|
||||
public static SensitiveCheckResult getNoneResult() {
|
||||
return NONE_RESULT;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,12 +1,10 @@
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||
|
||||
/**
|
||||
* 敏感词监测实现
|
||||
@@ -18,6 +16,15 @@ import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||
@ThreadSafe
|
||||
public class SensitiveCheckNum implements ISensitiveCheck {
|
||||
|
||||
/**
|
||||
* @since 0.3.0
|
||||
*/
|
||||
private static final ISensitiveCheck INSTANCE = new SensitiveCheckNum();
|
||||
|
||||
public static ISensitiveCheck getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
// 记录敏感词的长度
|
||||
@@ -26,7 +33,7 @@ public class SensitiveCheckNum implements ISensitiveCheck {
|
||||
|
||||
for (int i = beginIndex; i < txt.length(); i++) {
|
||||
char c = txt.charAt(i);
|
||||
char charKey = Instances.singleton(CharFormatChain.class).format(c, context);
|
||||
char charKey = context.charFormat().format(c, context);
|
||||
|
||||
// 如果是数字
|
||||
// 满足进入的条件
|
||||
|
||||
@@ -1,15 +1,13 @@
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.CommonEager;
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.lang.CharUtil;
|
||||
import com.github.houbb.heaven.util.util.regex.RegexUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.AppConst;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||
|
||||
/**
|
||||
* URL 正则表达式检测实现。
|
||||
@@ -27,10 +25,13 @@ import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||
public class SensitiveCheckUrl implements ISensitiveCheck {
|
||||
|
||||
/**
|
||||
* 最长的网址长度
|
||||
* @since 0.0.12
|
||||
* @since 0.3.0
|
||||
*/
|
||||
private static final int MAX_WEB_SITE_LEN = 70;
|
||||
private static final ISensitiveCheck INSTANCE = new SensitiveCheckUrl();
|
||||
|
||||
public static ISensitiveCheck getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
@@ -44,11 +45,11 @@ public class SensitiveCheckUrl implements ISensitiveCheck {
|
||||
// 后期如果有想法,对 DFA 进一步深入学习后,将进行优化。
|
||||
for(int i = beginIndex; i < txt.length(); i++) {
|
||||
char currentChar = txt.charAt(i);
|
||||
char mappingChar = Instances.singleton(CharFormatChain.class)
|
||||
char mappingChar = context.charFormat()
|
||||
.format(currentChar, context);
|
||||
|
||||
if(CharUtil.isWebSiteChar(mappingChar)
|
||||
&& lengthCount <= MAX_WEB_SITE_LEN) {
|
||||
&& lengthCount <= AppConst.MAX_WEB_SITE_LEN) {
|
||||
lengthCount++;
|
||||
stringBuilder.append(currentChar);
|
||||
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.lang.ObjectUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.AppConst;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
@@ -20,6 +18,15 @@ import java.util.Map;
|
||||
@ThreadSafe
|
||||
public class SensitiveCheckWord implements ISensitiveCheck {
|
||||
|
||||
/**
|
||||
* @since 0.3.0
|
||||
*/
|
||||
private static final ISensitiveCheck INSTANCE = new SensitiveCheckWord();
|
||||
|
||||
public static ISensitiveCheck getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
|
||||
Map nowMap = context.sensitiveWordMap();
|
||||
@@ -90,7 +97,7 @@ public class SensitiveCheckWord implements ISensitiveCheck {
|
||||
final String txt,
|
||||
final int index) {
|
||||
char c = txt.charAt(index);
|
||||
char mappingChar = Instances.singleton(CharFormatChain.class).format(c, context);
|
||||
char mappingChar = context.charFormat().format(c, context);
|
||||
|
||||
// 这里做一次重复词的处理
|
||||
//TODO: 这里可以优化,是否获取一次。
|
||||
@@ -99,7 +106,7 @@ public class SensitiveCheckWord implements ISensitiveCheck {
|
||||
if(context.ignoreRepeat()
|
||||
&& index > 0) {
|
||||
char preChar = txt.charAt(index-1);
|
||||
char preMappingChar = Instances.singleton(CharFormatChain.class)
|
||||
char preMappingChar = context.charFormat()
|
||||
.format(preChar, context);
|
||||
|
||||
// 直接赋值为上一个 map
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
package com.github.houbb.sensitive.word.support.check.impl;
|
||||
|
||||
import com.github.houbb.heaven.support.pipeline.Pipeline;
|
||||
import com.github.houbb.heaven.util.util.ArrayUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 敏感词检测工具
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public final class SensitiveChecks {
|
||||
|
||||
private SensitiveChecks(){}
|
||||
|
||||
/**
|
||||
* 初始化敏感检测策略
|
||||
* @param context 上下文
|
||||
*
|
||||
* @return 实现
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public static ISensitiveCheck initSensitiveCheck(final IWordContext context) {
|
||||
List<ISensitiveCheck> sensitiveCheckList = new ArrayList<>();
|
||||
|
||||
// 默认添加敏感词校验
|
||||
sensitiveCheckList.add(SensitiveChecks.word());
|
||||
|
||||
if(context.sensitiveCheckNum()) {
|
||||
sensitiveCheckList.add(SensitiveChecks.num());
|
||||
}
|
||||
if(context.sensitiveCheckEmail()) {
|
||||
sensitiveCheckList.add(SensitiveChecks.email());
|
||||
}
|
||||
if(context.sensitiveCheckUrl()) {
|
||||
sensitiveCheckList.add(SensitiveChecks.url());
|
||||
}
|
||||
|
||||
return SensitiveChecks.chains(sensitiveCheckList);
|
||||
}
|
||||
|
||||
public static ISensitiveCheck chains(final ISensitiveCheck... sensitiveChecks) {
|
||||
if (ArrayUtil.isEmpty(sensitiveChecks)){
|
||||
return none();
|
||||
}
|
||||
|
||||
return new SensitiveCheckInit() {
|
||||
@Override
|
||||
protected void init(Pipeline<ISensitiveCheck> pipeline) {
|
||||
for(ISensitiveCheck check : sensitiveChecks) {
|
||||
pipeline.addLast(check);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static ISensitiveCheck chains(final Collection<ISensitiveCheck> sensitiveChecks) {
|
||||
if (CollectionUtil.isEmpty(sensitiveChecks)){
|
||||
return none();
|
||||
}
|
||||
|
||||
return new SensitiveCheckInit() {
|
||||
@Override
|
||||
protected void init(Pipeline<ISensitiveCheck> pipeline) {
|
||||
for(ISensitiveCheck check : sensitiveChecks) {
|
||||
pipeline.addLast(check);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static ISensitiveCheck email() {
|
||||
return SensitiveCheckEmail.getInstance();
|
||||
}
|
||||
|
||||
public static ISensitiveCheck num() {
|
||||
return SensitiveCheckNum.getInstance();
|
||||
}
|
||||
|
||||
public static ISensitiveCheck url() {
|
||||
return SensitiveCheckUrl.getInstance();
|
||||
}
|
||||
|
||||
public static ISensitiveCheck word() {
|
||||
return SensitiveCheckWord.getInstance();
|
||||
}
|
||||
|
||||
public static ISensitiveCheck none() {
|
||||
return SensitiveCheckNone.getInstance();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,53 +0,0 @@
|
||||
package com.github.houbb.sensitive.word.support.data;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.util.guava.Guavas;
|
||||
import com.github.houbb.heaven.util.io.StreamUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordData;
|
||||
import com.github.houbb.sensitive.word.constant.AppConst;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 数据加载使用单例的模式,只需要加载一次即可。
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.0.1
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class SensitiveWordData implements IWordData {
|
||||
|
||||
/**
|
||||
* 默认的内置行
|
||||
*
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private static List<String> defaultLines;
|
||||
|
||||
static {
|
||||
synchronized (SensitiveWordData.class) {
|
||||
long start = System.currentTimeMillis();
|
||||
defaultLines = Guavas.newArrayList(AppConst.DICT_SIZE+AppConst.DICT_EN_SIZE);
|
||||
defaultLines = StreamUtil.readAllLines("/dict.txt");
|
||||
defaultLines.addAll(StreamUtil.readAllLines("/dict_en.txt"));
|
||||
|
||||
// 用户自定义
|
||||
List<String> denyList = StreamUtil.readAllLines("/sensitive_word_deny.txt");
|
||||
defaultLines.addAll(denyList);
|
||||
|
||||
// 移除白名单词语
|
||||
List<String> allowList = StreamUtil.readAllLines("/sensitive_word_allow.txt");
|
||||
defaultLines = CollectionUtil.difference(defaultLines, allowList);
|
||||
|
||||
long end = System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<String> getWordData() {
|
||||
return defaultLines;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -16,6 +16,15 @@ import java.util.List;
|
||||
@ThreadSafe
|
||||
public class WordDenySystem implements IWordDeny {
|
||||
|
||||
/**
|
||||
* @since 0.3.0
|
||||
*/
|
||||
private static final IWordDeny INSTANCE = new WordDenySystem();
|
||||
|
||||
public static IWordDeny getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
List<String> results = StreamUtil.readAllLines("/dict.txt");
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
package com.github.houbb.sensitive.word.support.deny;
|
||||
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.support.pipeline.Pipeline;
|
||||
import com.github.houbb.heaven.util.util.ArrayUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||
@@ -43,7 +42,7 @@ public final class WordDenys {
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public static IWordDeny system() {
|
||||
return Instances.singleton(WordDenySystem.class);
|
||||
return WordDenySystem.getInstance();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
package com.github.houbb.sensitive.word.support.format;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.guava.Guavas;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.ICharFormat;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 格式化责任链
|
||||
* @author binbin.hou
|
||||
* @since 0.0.5
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class CharFormatChain implements ICharFormat {
|
||||
|
||||
@Override
|
||||
public char format(char original, IWordContext context) {
|
||||
char result = original;
|
||||
|
||||
List<ICharFormat> charFormats = Guavas.newArrayList();
|
||||
if(context.ignoreEnglishStyle()) {
|
||||
charFormats.add(Instances.singleton(IgnoreEnglishStyleFormat.class));
|
||||
}
|
||||
if(context.ignoreCase()) {
|
||||
charFormats.add(Instances.singleton(IgnoreCaseCharFormat.class));
|
||||
}
|
||||
if(context.ignoreWidth()) {
|
||||
charFormats.add(Instances.singleton(IgnoreWidthCharFormat.class));
|
||||
}
|
||||
if(context.ignoreNumStyle()) {
|
||||
charFormats.add(Instances.singleton(IgnoreNumStyleCharFormat.class));
|
||||
}
|
||||
if(context.ignoreChineseStyle()) {
|
||||
charFormats.add(Instances.singleton(IgnoreChineseStyleFormat.class));
|
||||
}
|
||||
|
||||
// 循环执行
|
||||
for(ICharFormat charFormat : charFormats) {
|
||||
result = charFormat.format(result, context);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
package com.github.houbb.sensitive.word.support.format;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.pipeline.Pipeline;
|
||||
import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
|
||||
import com.github.houbb.sensitive.word.api.ICharFormat;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 格式化责任链
|
||||
* @author binbin.hou
|
||||
* @since 0.0.5
|
||||
*/
|
||||
@ThreadSafe
|
||||
public abstract class CharFormatInit implements ICharFormat {
|
||||
|
||||
/**
|
||||
* 初始化列表
|
||||
*
|
||||
* @param pipeline 当前列表泳道
|
||||
* @since 0.0.13
|
||||
*/
|
||||
protected abstract void init(final Pipeline<ICharFormat> pipeline);
|
||||
|
||||
@Override
|
||||
public char format(char original, IWordContext context) {
|
||||
Pipeline<ICharFormat> pipeline = new DefaultPipeline<>();
|
||||
init(pipeline);
|
||||
|
||||
char result = original;
|
||||
|
||||
// 循环执行
|
||||
List<ICharFormat> charFormats = pipeline.list();
|
||||
for(ICharFormat charFormat : charFormats) {
|
||||
result = charFormat.format(result, context);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,112 @@
|
||||
package com.github.houbb.sensitive.word.support.format;
|
||||
|
||||
import com.github.houbb.heaven.support.pipeline.Pipeline;
|
||||
import com.github.houbb.heaven.util.guava.Guavas;
|
||||
import com.github.houbb.heaven.util.util.ArrayUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.sensitive.word.api.ICharFormat;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 格式化工具类
|
||||
* @author binbin.hou
|
||||
* @since 0.3.5
|
||||
*/
|
||||
public final class CharFormats {
|
||||
|
||||
private CharFormats(){}
|
||||
|
||||
/**
|
||||
* 初始化格式化
|
||||
* @param context 上下文
|
||||
* @return 结果
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public static ICharFormat initCharFormat(final IWordContext context) {
|
||||
List<ICharFormat> charFormats = Guavas.newArrayList();
|
||||
if(context.ignoreEnglishStyle()) {
|
||||
charFormats.add(ignoreEnglishStyle());
|
||||
}
|
||||
if(context.ignoreCase()) {
|
||||
charFormats.add(ignoreCase());
|
||||
}
|
||||
if(context.ignoreWidth()) {
|
||||
charFormats.add(ignoreWidth());
|
||||
}
|
||||
if(context.ignoreNumStyle()) {
|
||||
charFormats.add(ignoreNumStyle());
|
||||
}
|
||||
if(context.ignoreChineseStyle()) {
|
||||
charFormats.add(ignoreChineseStyle());
|
||||
}
|
||||
|
||||
return chains(charFormats);
|
||||
}
|
||||
|
||||
/**
|
||||
* 链式
|
||||
* @param charFormats 列表
|
||||
* @return 结果
|
||||
*/
|
||||
public static ICharFormat chains(final ICharFormat ... charFormats) {
|
||||
if(ArrayUtil.isEmpty(charFormats)) {
|
||||
return none();
|
||||
}
|
||||
|
||||
return new CharFormatInit() {
|
||||
@Override
|
||||
protected void init(Pipeline<ICharFormat> pipeline) {
|
||||
for(ICharFormat charFormat : charFormats) {
|
||||
pipeline.addLast(charFormat);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 链式
|
||||
* @param charFormats 列表
|
||||
* @return 结果
|
||||
*/
|
||||
public static ICharFormat chains(final Collection<ICharFormat> charFormats) {
|
||||
if(CollectionUtil.isEmpty(charFormats)) {
|
||||
return none();
|
||||
}
|
||||
|
||||
return new CharFormatInit() {
|
||||
@Override
|
||||
protected void init(Pipeline<ICharFormat> pipeline) {
|
||||
for(ICharFormat charFormat : charFormats) {
|
||||
pipeline.addLast(charFormat);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static ICharFormat none() {
|
||||
return NoneCharFormat.getInstance();
|
||||
}
|
||||
public static ICharFormat ignoreCase() {
|
||||
return IgnoreCaseCharFormat.getInstance();
|
||||
}
|
||||
|
||||
public static ICharFormat ignoreEnglishStyle() {
|
||||
return IgnoreEnglishStyleFormat.getInstance();
|
||||
}
|
||||
|
||||
public static ICharFormat ignoreChineseStyle() {
|
||||
return IgnoreChineseStyleFormat.getInstance();
|
||||
}
|
||||
|
||||
public static ICharFormat ignoreNumStyle() {
|
||||
return IgnoreNumStyleCharFormat.getInstance();
|
||||
}
|
||||
|
||||
public static ICharFormat ignoreWidth() {
|
||||
return IgnoreWidthCharFormat.getInstance();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -12,6 +12,12 @@ import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
@ThreadSafe
|
||||
public class IgnoreCaseCharFormat implements ICharFormat {
|
||||
|
||||
private static final ICharFormat INSTANCE = new IgnoreCaseCharFormat();
|
||||
|
||||
public static ICharFormat getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public char format(char original, IWordContext context) {
|
||||
return Character.toLowerCase(original);
|
||||
|
||||
@@ -7,13 +7,19 @@ import com.github.houbb.sensitive.word.api.ICharFormat;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
|
||||
/**
|
||||
* 忽略大小写
|
||||
* 忽略中文样式
|
||||
* @author binbin.hou
|
||||
* @since 0.0.5
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class IgnoreChineseStyleFormat implements ICharFormat {
|
||||
|
||||
private static final ICharFormat INSTANCE = new IgnoreChineseStyleFormat();
|
||||
|
||||
public static ICharFormat getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public char format(char original, IWordContext context) {
|
||||
String string = String.valueOf(original);
|
||||
|
||||
@@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.format;
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.sensitive.word.api.ICharFormat;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.utils.CharUtils;
|
||||
import com.github.houbb.sensitive.word.utils.InnerCharUtils;
|
||||
|
||||
/**
|
||||
* 忽略英文的各种格式
|
||||
@@ -13,9 +13,15 @@ import com.github.houbb.sensitive.word.utils.CharUtils;
|
||||
@ThreadSafe
|
||||
public class IgnoreEnglishStyleFormat implements ICharFormat {
|
||||
|
||||
private static final ICharFormat INSTANCE = new IgnoreEnglishStyleFormat();
|
||||
|
||||
public static ICharFormat getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public char format(char original, IWordContext context) {
|
||||
return CharUtils.getMappingChar(original);
|
||||
return InnerCharUtils.getMappingChar(original);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.format;
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.ICharFormat;
|
||||
import com.github.houbb.sensitive.word.utils.NumUtils;
|
||||
import com.github.houbb.sensitive.word.utils.InnerNumUtils;
|
||||
|
||||
/**
|
||||
* 忽略数字的样式
|
||||
@@ -13,9 +13,15 @@ import com.github.houbb.sensitive.word.utils.NumUtils;
|
||||
@ThreadSafe
|
||||
public class IgnoreNumStyleCharFormat implements ICharFormat {
|
||||
|
||||
private static final ICharFormat INSTANCE = new IgnoreNumStyleCharFormat();
|
||||
|
||||
public static ICharFormat getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public char format(char original, IWordContext context) {
|
||||
return NumUtils.getMappingChar(original);
|
||||
return InnerNumUtils.getMappingChar(original);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -6,13 +6,19 @@ import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.ICharFormat;
|
||||
|
||||
/**
|
||||
* 格式化责任链
|
||||
* 格式化字宽度
|
||||
* @author binbin.hou
|
||||
* @since 0.0.5
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class IgnoreWidthCharFormat implements ICharFormat {
|
||||
|
||||
private static final ICharFormat INSTANCE = new IgnoreWidthCharFormat();
|
||||
|
||||
public static ICharFormat getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public char format(char original, IWordContext context) {
|
||||
return CharUtil.toHalfWidth(original);
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.github.houbb.sensitive.word.support.format;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.sensitive.word.api.ICharFormat;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
|
||||
/**
|
||||
* 无处理
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.0.5
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class NoneCharFormat implements ICharFormat {
|
||||
|
||||
private static final ICharFormat INSTANCE = new NoneCharFormat();
|
||||
|
||||
public static ICharFormat getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public char format(char original, IWordContext context) {
|
||||
return original;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,19 +1,15 @@
|
||||
package com.github.houbb.sensitive.word.support.map;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.guava.Guavas;
|
||||
import com.github.houbb.heaven.util.io.FileUtil;
|
||||
import com.github.houbb.heaven.util.lang.CharUtil;
|
||||
import com.github.houbb.heaven.util.lang.ObjectUtil;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.heaven.util.util.MapUtil;
|
||||
import com.github.houbb.sensitive.word.api.*;
|
||||
import com.github.houbb.sensitive.word.constant.AppConst;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckChain;
|
||||
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl;
|
||||
import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceContext;
|
||||
import com.github.houbb.sensitive.word.support.result.WordResult;
|
||||
@@ -52,7 +48,6 @@ public class SensitiveWordMap implements IWordMap {
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public synchronized void initWordMap(Collection<String> collection) {
|
||||
long startTime = System.currentTimeMillis();
|
||||
// 避免扩容带来的消耗
|
||||
Map newInnerWordMap = new HashMap(collection.size());
|
||||
|
||||
@@ -99,8 +94,6 @@ public class SensitiveWordMap implements IWordMap {
|
||||
|
||||
// 最后更新为新的 map,保证更新过程中旧的数据可用
|
||||
this.innerWordMap = newInnerWordMap;
|
||||
|
||||
long endTime = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -154,12 +147,12 @@ public class SensitiveWordMap implements IWordMap {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String replace(String target, final ISensitiveWordReplace replace, final IWordContext context) {
|
||||
public String replace(String target, final IWordContext context) {
|
||||
if(StringUtil.isEmpty(target)) {
|
||||
return target;
|
||||
}
|
||||
|
||||
return this.replaceSensitiveWord(target, replace, context);
|
||||
return this.replaceSensitiveWord(target, context);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -211,13 +204,11 @@ public class SensitiveWordMap implements IWordMap {
|
||||
/**
|
||||
* 直接替换敏感词,返回替换后的结果
|
||||
* @param target 文本信息
|
||||
* @param replace 替换策略
|
||||
* @param context 上下文
|
||||
* @return 脱敏后的字符串
|
||||
* @since 0.0.2
|
||||
*/
|
||||
private String replaceSensitiveWord(final String target,
|
||||
final ISensitiveWordReplace replace,
|
||||
final IWordContext context) {
|
||||
if(StringUtil.isEmpty(target)) {
|
||||
return target;
|
||||
@@ -245,7 +236,7 @@ public class SensitiveWordMap implements IWordMap {
|
||||
ISensitiveWordReplaceContext replaceContext = SensitiveWordReplaceContext.newInstance()
|
||||
.sensitiveWord(string)
|
||||
.wordLength(wordLength);
|
||||
String replaceStr = replace.replace(replaceContext);
|
||||
String replaceStr = context.sensitiveWordReplace().replace(replaceContext);
|
||||
|
||||
resultBuilder.append(replaceStr);
|
||||
}
|
||||
@@ -267,7 +258,7 @@ public class SensitiveWordMap implements IWordMap {
|
||||
context.sensitiveWordMap(innerWordMap);
|
||||
|
||||
// 责任链模式调用
|
||||
return Instances.singleton(SensitiveCheckChain.class)
|
||||
return context.sensitiveCheck()
|
||||
.sensitiveCheck(txt, beginIndex, validModeEnum, context);
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
package com.github.houbb.sensitive.word.support.map;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordMap;
|
||||
|
||||
/**
|
||||
* 敏感词 map
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public final class WordMaps {
|
||||
|
||||
private WordMaps(){}
|
||||
|
||||
/**
|
||||
* 默认策略
|
||||
* @return 策略
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public static IWordMap defaults() {
|
||||
return new SensitiveWordMap();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package com.github.houbb.sensitive.word.support.replace;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.constant.CharConst;
|
||||
import com.github.houbb.heaven.util.lang.CharUtil;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
|
||||
@@ -13,12 +14,20 @@ import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
|
||||
@ThreadSafe
|
||||
public class SensitiveWordReplaceChar implements ISensitiveWordReplace {
|
||||
|
||||
/**
|
||||
* 替换的字符
|
||||
* @since 0.3.0
|
||||
*/
|
||||
private final char replaceChar;
|
||||
|
||||
public SensitiveWordReplaceChar(char replaceChar) {
|
||||
this.replaceChar = replaceChar;
|
||||
}
|
||||
|
||||
public SensitiveWordReplaceChar() {
|
||||
this(CharConst.STAR);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String replace(ISensitiveWordReplaceContext context) {
|
||||
int wordLength = context.wordLength();
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
package com.github.houbb.sensitive.word.support.replace;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
|
||||
|
||||
/**
|
||||
* 字符替换策略工具类
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public final class SensitiveWordReplaces {
|
||||
|
||||
private SensitiveWordReplaces(){}
|
||||
|
||||
/**
|
||||
* 字符
|
||||
* @param c 字符
|
||||
* @return 结果
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public static ISensitiveWordReplace chars(final char c) {
|
||||
return new SensitiveWordReplaceChar(c);
|
||||
}
|
||||
|
||||
/**
|
||||
* 字符,默认为 *
|
||||
* @return 结果
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public static ISensitiveWordReplace chars() {
|
||||
return new SensitiveWordReplaceChar();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -12,6 +12,15 @@ import com.github.houbb.sensitive.word.api.IWordResultHandler;
|
||||
@ThreadSafe
|
||||
public class WordResultHandlerRaw implements IWordResultHandler<IWordResult> {
|
||||
|
||||
/**
|
||||
* @since 0.3.0
|
||||
*/
|
||||
private static final WordResultHandlerRaw INSTANCE = new WordResultHandlerRaw();
|
||||
|
||||
public static WordResultHandlerRaw getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IWordResult handle(IWordResult wordResult) {
|
||||
return wordResult;
|
||||
|
||||
@@ -13,6 +13,15 @@ import com.github.houbb.sensitive.word.api.IWordResultHandler;
|
||||
@ThreadSafe
|
||||
public class WordResultHandlerWord implements IWordResultHandler<String> {
|
||||
|
||||
/**
|
||||
* @since 0.3.0
|
||||
*/
|
||||
private static final WordResultHandlerWord INSTANCE = new WordResultHandlerWord();
|
||||
|
||||
public static WordResultHandlerWord getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String handle(IWordResult wordResult) {
|
||||
if(wordResult == null) {
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
package com.github.houbb.sensitive.word.support.result;
|
||||
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
import com.github.houbb.sensitive.word.api.IWordResultHandler;
|
||||
|
||||
@@ -19,7 +18,7 @@ public final class WordResultHandlers {
|
||||
* @since 0.1.0
|
||||
*/
|
||||
public static IWordResultHandler<IWordResult> raw() {
|
||||
return Instances.singleton(WordResultHandlerRaw.class);
|
||||
return WordResultHandlerRaw.getInstance();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -28,7 +27,7 @@ public final class WordResultHandlers {
|
||||
* @since 0.1.0
|
||||
*/
|
||||
public static IWordResultHandler<String> word() {
|
||||
return Instances.singleton(WordResultHandlerWord.class);
|
||||
return WordResultHandlerWord.getInstance();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -12,9 +12,9 @@ import java.util.Map;
|
||||
* @author Administrator
|
||||
* @since 0.0.4
|
||||
*/
|
||||
public final class CharUtils {
|
||||
public final class InnerCharUtils {
|
||||
|
||||
private CharUtils() {
|
||||
private InnerCharUtils() {
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1,10 +1,12 @@
|
||||
package com.github.houbb.sensitive.word.utils;
|
||||
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.sensitive.word.api.ICharFormat;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 内部格式化工具类
|
||||
@@ -21,13 +23,13 @@ public final class InnerFormatUtils {
|
||||
* @return 结果
|
||||
* @since 0.1.1
|
||||
*/
|
||||
public static String format(String original, IWordContext context) {
|
||||
public static String format(final String original, final IWordContext context) {
|
||||
if(StringUtil.isEmpty(original)) {
|
||||
return original;
|
||||
}
|
||||
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
ICharFormat charFormat = Instances.singleton(CharFormatChain.class);
|
||||
ICharFormat charFormat = context.charFormat();
|
||||
char[] chars = original.toCharArray();
|
||||
for(char c : chars) {
|
||||
char cf = charFormat.format(c, context);
|
||||
@@ -37,4 +39,26 @@ public final class InnerFormatUtils {
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化列表
|
||||
* @param list 列表
|
||||
* @param context 上下文
|
||||
* @return 结果
|
||||
* @since 0。3.0
|
||||
*/
|
||||
public static List<String> formatWordList(List<String> list,
|
||||
final IWordContext context) {
|
||||
if(CollectionUtil.isEmpty(list)) {
|
||||
return list;
|
||||
}
|
||||
|
||||
List<String> resultList = new ArrayList<>(list.size());
|
||||
for(String word : list) {
|
||||
String formatWord = InnerFormatUtils.format(word, context);
|
||||
resultList.add(formatWord);
|
||||
}
|
||||
|
||||
return resultList;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -15,9 +15,9 @@ import java.util.Map;
|
||||
* @author Administrator
|
||||
* @since 0.0.4
|
||||
*/
|
||||
public final class NumUtils {
|
||||
public final class InnerNumUtils {
|
||||
|
||||
private NumUtils(){}
|
||||
private InnerNumUtils(){}
|
||||
|
||||
private static final String NUM_ONE = "⓪0零º₀⓿○" +
|
||||
"123456789" +
|
||||
@@ -0,0 +1,48 @@
|
||||
package com.github.houbb.sensitive.word.utils;
|
||||
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* 数据工具包
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public final class InnerWordDataUtils {
|
||||
|
||||
private InnerWordDataUtils(){}
|
||||
|
||||
/**
|
||||
* 获取禁止列表中真正的禁止词汇
|
||||
* @param denyList 禁止
|
||||
* @param allowList 允许
|
||||
* @return 结果
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public static List<String> getActualDenyList(List<String> denyList, List<String> allowList,
|
||||
final IWordContext context) {
|
||||
if(CollectionUtil.isEmpty(denyList)) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
if(CollectionUtil.isEmpty(allowList)) {
|
||||
return denyList;
|
||||
}
|
||||
|
||||
List<String> formatDenyList = InnerFormatUtils.formatWordList(denyList, context);
|
||||
List<String> formatAllowList = InnerFormatUtils.formatWordList(allowList, context);
|
||||
|
||||
List<String> resultList = new ArrayList<>();
|
||||
// O(1)
|
||||
Set<String> allowSet = new HashSet<>(formatAllowList);
|
||||
|
||||
for(String deny : formatDenyList) {
|
||||
if(allowSet.contains(deny)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
resultList.add(deny);
|
||||
}
|
||||
return resultList;
|
||||
}
|
||||
}
|
||||
@@ -22,7 +22,7 @@ public class SensitiveWordBsChineseTest {
|
||||
public void ignoreChineseStyleTest() {
|
||||
final String text = "我爱我的祖国和五星紅旗。";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||
Assert.assertEquals("[祖国, 五星紅旗]", wordList.toString());
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ public class SensitiveWordBsEmailTest {
|
||||
public void emailEnglishTest() {
|
||||
final String text = "楼主好人,邮箱 sensitiveword@xx.com";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||
Assert.assertEquals("[邮箱, sensitiveword@xx.com]", wordList.toString());
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ public class SensitiveWordBsEmailTest {
|
||||
public void emailNumberTest() {
|
||||
final String text = "楼主好人,邮箱 123456789@xx.com";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||
Assert.assertEquals("[邮箱, 123456789, xx.com]", wordList.toString());
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ public class SensitiveWordBsEnglishTest {
|
||||
public void ignoreEnglishStyleTest() {
|
||||
final String text = "Ⓕⓤc⒦ the bad words";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||
Assert.assertEquals("[Ⓕⓤc⒦]", wordList.toString());
|
||||
}
|
||||
|
||||
|
||||
@@ -23,12 +23,13 @@ public class SensitiveWordBsNumLenTest {
|
||||
final String text = "你懂得:12345678";
|
||||
|
||||
// 默认检测 8 位
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||
Assert.assertEquals("[12345678]", wordList.toString());
|
||||
|
||||
// 指定数字的长度,避免误杀
|
||||
List<String> wordList2 = SensitiveWordBs.newInstance()
|
||||
.numCheckLen(9)
|
||||
.init()
|
||||
.findAll(text);
|
||||
Assert.assertEquals("[]", wordList2.toString());
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ public class SensitiveWordBsNumTest {
|
||||
public void findAllTest() {
|
||||
final String text = "这个是我的微信:9989123456";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||
Assert.assertEquals("[微信, 9989123456]", wordList.toString());
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ public class SensitiveWordBsNumTest {
|
||||
public void ignoreNumStyleTest() {
|
||||
final String text = "这个是我的微信:9⓿二肆⁹₈③⑸⒋➃㈤㊄";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||
Assert.assertEquals("[微信, 9⓿二肆⁹₈③⑸⒋➃㈤㊄]", wordList.toString());
|
||||
}
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ public class SensitiveWordBsRepeatTest {
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance()
|
||||
.ignoreRepeat(true)
|
||||
.init()
|
||||
.findAll(text);
|
||||
Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString());
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
||||
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||
import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaces;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
@@ -24,7 +25,7 @@ public class SensitiveWordBsTest {
|
||||
public void containsTest() {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
Assert.assertTrue(SensitiveWordBs.newInstance().contains(text));
|
||||
Assert.assertTrue(SensitiveWordBs.newInstance().init().contains(text));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -35,7 +36,7 @@ public class SensitiveWordBsTest {
|
||||
public void findAllTest() {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString());
|
||||
}
|
||||
|
||||
@@ -47,7 +48,7 @@ public class SensitiveWordBsTest {
|
||||
public void findFirstTest() {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
String word = SensitiveWordBs.newInstance().findFirst(text);
|
||||
String word = SensitiveWordBs.newInstance().init().findFirst(text);
|
||||
Assert.assertEquals("五星红旗", word);
|
||||
}
|
||||
|
||||
@@ -59,7 +60,7 @@ public class SensitiveWordBsTest {
|
||||
public void replaceTest() {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
String result = SensitiveWordBs.newInstance().replace(text);
|
||||
String result = SensitiveWordBs.newInstance().init().replace(text);
|
||||
Assert.assertEquals("****迎风飘扬,***的画像屹立在***前。", result);
|
||||
}
|
||||
|
||||
@@ -71,7 +72,10 @@ public class SensitiveWordBsTest {
|
||||
public void replaceCharTest() {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
String result = SensitiveWordBs.newInstance().replace(text, '0');
|
||||
String result = SensitiveWordBs.newInstance()
|
||||
.sensitiveWordReplace(SensitiveWordReplaces.chars('0'))
|
||||
.init()
|
||||
.replace(text);
|
||||
Assert.assertEquals("0000迎风飘扬,000的画像屹立在000前。", result);
|
||||
}
|
||||
|
||||
@@ -83,7 +87,7 @@ public class SensitiveWordBsTest {
|
||||
public void ignoreCaseTest() {
|
||||
final String text = "fuCK the bad words.";
|
||||
|
||||
String word = SensitiveWordBs.newInstance().findFirst(text);
|
||||
String word = SensitiveWordBs.newInstance().init().findFirst(text);
|
||||
Assert.assertEquals("fuCK", word);
|
||||
}
|
||||
|
||||
@@ -95,7 +99,7 @@ public class SensitiveWordBsTest {
|
||||
public void ignoreWidthTest() {
|
||||
final String text = "fuck the bad words.";
|
||||
|
||||
String word = SensitiveWordBs.newInstance().findFirst(text);
|
||||
String word = SensitiveWordBs.newInstance().init().findFirst(text);
|
||||
Assert.assertEquals("fuck", word);
|
||||
}
|
||||
|
||||
|
||||
@@ -22,11 +22,13 @@ public class SensitiveWordBsUrlTest {
|
||||
public void commonUrlTest() {
|
||||
final String text = "点击链接 www.baidu.com查看答案";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||
Assert.assertEquals("[链接, www.baidu.com]", wordList.toString());
|
||||
|
||||
Assert.assertEquals("点击** *************查看答案", SensitiveWordBs
|
||||
.newInstance().replace(text));
|
||||
.newInstance()
|
||||
.init()
|
||||
.replace(text));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -41,10 +43,10 @@ public class SensitiveWordBsUrlTest {
|
||||
public void imageUrlTest() {
|
||||
final String text = "双击查看大图 www.big-image.png查看";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||
Assert.assertEquals("[www.big-image.png]", wordList.toString());
|
||||
|
||||
Assert.assertEquals(text, SensitiveWordBs.newInstance().replace(text));
|
||||
Assert.assertEquals(text, SensitiveWordBs.newInstance().init().replace(text));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ public class SensitiveWordBsUserDefineTest {
|
||||
public void allowAndDenyTest() {
|
||||
final String text = "gender 我们认为应该通过,自定义敏感词我们认为应该拒绝。";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||
Assert.assertEquals("[自定义敏感词]", wordList.toString());
|
||||
}
|
||||
|
||||
|
||||
@@ -1,12 +1,6 @@
|
||||
package com.github.houbb.sensitive.word.data;
|
||||
|
||||
import com.github.houbb.heaven.support.handler.IHandler;
|
||||
import com.github.houbb.heaven.util.io.FileUtil;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.opencc4j.core.impl.ZhConvertBootstrap;
|
||||
import com.github.houbb.opencc4j.support.segment.impl.CharSegment;
|
||||
import com.github.houbb.sensitive.word.utils.NumUtils;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
@@ -3,12 +3,11 @@ package com.github.houbb.sensitive.word.data;
|
||||
import com.github.houbb.heaven.support.filter.IFilter;
|
||||
import com.github.houbb.heaven.support.handler.IHandler;
|
||||
import com.github.houbb.heaven.util.io.FileUtil;
|
||||
import com.github.houbb.heaven.util.lang.NumUtil;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.opencc4j.core.impl.ZhConvertBootstrap;
|
||||
import com.github.houbb.opencc4j.support.segment.impl.CharSegment;
|
||||
import com.github.houbb.sensitive.word.utils.NumUtils;
|
||||
import com.github.houbb.sensitive.word.utils.InnerNumUtils;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
@@ -128,7 +127,7 @@ public class DictSlimTest {
|
||||
|
||||
// 停顿词语
|
||||
String trim = string.replaceAll("加|否|与|和", "");
|
||||
String mapString = NumUtils.getMappingString(trim);
|
||||
String mapString = InnerNumUtils.getMappingString(trim);
|
||||
boolean result = StringUtil.isDigit(mapString);
|
||||
if(result) {
|
||||
System.out.println(string);
|
||||
|
||||
Reference in New Issue
Block a user