diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md
index 8c40181..46309f5 100644
--- a/CHANGE_LOG.md
+++ b/CHANGE_LOG.md
@@ -140,3 +140,11 @@
|:---|:---|:---|:---|:--|
| 1 | O | 移除日志初始化的控台日志输出 | 2023-02-17 23:51:58 | |
| 2 | A | 支持数字检验的长度指定 | 2022-01-17 23:51:58 | |
+
+# release_0.3.0
+
+| 序号 | 变更类型 | 说明 | 时间 | 备注 |
+|:---|:-----|:-----------------|:--------------------|:--|
+| 1 | O | 移除冗余的耗时统计 | 2023-06-06 23:51:58 | |
+| 2 | A | 优化代码实现方式,添加工具类方法 | 2023-06-06 23:51:58 | |
+
diff --git a/README.md b/README.md
index 89f3a6b..1c8a0ad 100644
--- a/README.md
+++ b/README.md
@@ -46,9 +46,9 @@
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/doc/CHANGE_LOG.md)
-v0.2.1 变更:
+v0.3.0 变更:
-- 支持用户自定义数字检测的长度
+- 代码实现优化
# 快速开始
@@ -64,7 +64,7 @@ v0.2.1 变更:
com.github.houbb
sensitive-word
- 0.2.1
+ 0.3.0
```
@@ -285,6 +285,7 @@ final String text = "ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦ the bad words";
List wordList = SensitiveWordBs.newInstance()
.ignoreRepeat(true)
+ .init()
.findAll(text);
Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString());
```
@@ -308,12 +309,13 @@ V0.2.1 之后,支持通过 `numCheckLen(长度)` 自定义检测的长度。
final String text = "你懂得:12345678";
// 默认检测 8 位
-List wordList = SensitiveWordBs.newInstance().findAll(text);
+List wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[12345678]", wordList.toString());
// 指定数字的长度,避免误杀
List wordList2 = SensitiveWordBs.newInstance()
.numCheckLen(9)
+ .init()
.findAll(text);
Assert.assertEquals("[]", wordList2.toString());
```
@@ -353,18 +355,19 @@ Assert.assertTrue(wordBs.contains(text));
其中各项配置的说明如下:
-| 序号 | 方法 | 说明 | 默认值 |
-|:----|:---|:--------------|:------|
-| 1 | ignoreCase | 忽略大小写 | true |
-| 2 | ignoreWidth | 忽略半角圆角 | true |
-| 3 | ignoreNumStyle | 忽略数字的写法 | true |
-| 4 | ignoreChineseStyle | 忽略中文的书写格式 | true |
-| 5 | ignoreEnglishStyle | 忽略英文的书写格式 | true |
-| 6 | ignoreRepeat | 忽略重复词 | false |
-| 7 | enableNumCheck | 是否启用数字检测。 | true |
-| 8 | enableEmailCheck | 是有启用邮箱检测 | true |
-| 9 | enableUrlCheck | 是否启用链接检测 | true |
-| 10 | numCheckLen | 数字检测,自定义指定长度。| 8 |
+| 序号 | 方法 | 说明 | 默认值 |
+|:---|:---------------------|:--------------|:-------|
+| 1 | ignoreCase | 忽略大小写 | true |
+| 2 | ignoreWidth | 忽略半角圆角 | true |
+| 3 | ignoreNumStyle | 忽略数字的写法 | true |
+| 4 | ignoreChineseStyle | 忽略中文的书写格式 | true |
+| 5 | ignoreEnglishStyle | 忽略英文的书写格式 | true |
+| 6 | ignoreRepeat | 忽略重复词 | false |
+| 7 | enableNumCheck | 是否启用数字检测。 | true |
+| 8 | enableEmailCheck | 是有启用邮箱检测 | true |
+| 9 | enableUrlCheck | 是否启用链接检测 | true |
+| 10 | numCheckLen | 数字检测,自定义指定长度。 | 8 |
+| 11 | sensitiveWordReplace | 敏感词替换策略 | `*` 替换 |
# 动态加载(用户自定义)
@@ -612,6 +615,8 @@ public class SensitiveWordService {
# 后期 road-map
+- [ ] wordMap 的抽象,便于拓展
+
- 同音字处理
- 形近字处理
diff --git a/pom.xml b/pom.xml
index 9c934e7..58f1d6b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
com.github.houbb
sensitive-word
- 0.2.1
+ 0.3.0
diff --git a/release.bat b/release.bat
index 25c7ada..c267b31 100644
--- a/release.bat
+++ b/release.bat
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
-SET version=0.2.1
+SET version=0.3.0
:::: 新版本名称
-SET newVersion=0.2.2
+SET newVersion=0.4.0
:::: 组织名称
SET groupName=com.github.houbb
:::: 项目名称
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
index fe1a233..2a0da76 100644
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
@@ -1,5 +1,7 @@
package com.github.houbb.sensitive.word.api;
+import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
+
import java.util.Map;
/**
@@ -173,4 +175,51 @@ public interface IWordContext {
*/
IWordContext sensitiveCheckNumLen(final int sensitiveCheckNumLen);
+ /**
+ * 设置检测策略
+ * @param sensitiveCheck 检测策略
+ * @return this
+ * @since 0.3.0
+ */
+ IWordContext sensitiveCheck(final ISensitiveCheck sensitiveCheck);
+
+ /**
+ * 获取检测策略
+ * @return 检测策略
+ * @since 0.3.0
+ */
+ ISensitiveCheck sensitiveCheck();
+
+ /**
+ * 设置敏感词替换策略
+ * @param sensitiveWordReplace 策略
+ * @return this
+ * @since 0.3.0
+ */
+ IWordContext sensitiveWordReplace(final ISensitiveWordReplace sensitiveWordReplace);
+
+ /**
+ * 敏感词替换策略
+ * @return 替换策略
+ * @since 0.3.0
+ */
+ ISensitiveWordReplace sensitiveWordReplace();
+
+ /**
+ * 设置统一的字符处理
+ *
+ * @param charFormat 字符处理
+ * @return 结果
+ * @since 0.3.0
+ */
+ IWordContext charFormat(final ICharFormat charFormat);
+
+ /**
+ * 获取格式化策略
+ *
+ * @return 策略
+ * @since 0.3.0
+ */
+ ICharFormat charFormat();
+
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java
deleted file mode 100644
index 6413285..0000000
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java
+++ /dev/null
@@ -1,20 +0,0 @@
-package com.github.houbb.sensitive.word.api;
-
-import java.util.List;
-
-/**
- * 数据词接口
- * @author binbin.hou
- * @since 0.0.1
- */
-@Deprecated
-public interface IWordData {
-
- /**
- * 获取对应的敏感词
- * @return 结果
- * @since 0.0.1
- */
- List getWordData();
-
-}
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
index 238c15d..31610c6 100644
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
@@ -59,13 +59,11 @@ public interface IWordMap extends ISensitiveCheck {
* ps: 这里可以添加优化。
*
* @param target 目标字符串
- * @param replace 替换策略
* @param context 上下文
* @return 替换后结果
* @since 0.0.2
*/
String replace(final String target,
- final ISensitiveWordReplace replace,
final IWordContext context);
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
index c2909af..bca90fc 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
@@ -1,18 +1,20 @@
package com.github.houbb.sensitive.word.bs;
-import com.github.houbb.heaven.constant.CharConst;
import com.github.houbb.heaven.support.handler.IHandler;
import com.github.houbb.heaven.util.common.ArgUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.*;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
+import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
+import com.github.houbb.sensitive.word.support.check.impl.SensitiveChecks;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
-import com.github.houbb.sensitive.word.support.map.SensitiveWordMap;
-import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceChar;
+import com.github.houbb.sensitive.word.support.format.CharFormats;
+import com.github.houbb.sensitive.word.support.map.WordMaps;
+import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaces;
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
-import com.github.houbb.sensitive.word.utils.InnerFormatUtils;
+import com.github.houbb.sensitive.word.utils.InnerWordDataUtils;
-import java.util.*;
+import java.util.List;
/**
* 敏感词引导类
@@ -30,19 +32,63 @@ public class SensitiveWordBs {
private SensitiveWordBs() {
}
+
+ //------------------------------------------------------------- 基本属性 START
+ // 格式统一化
+ /**
+ * 是否忽略大小写
+ */
+ private boolean ignoreCase = true;
+ /**
+ * 是否忽略全角、半角
+ */
+ private boolean ignoreWidth = true;
+ /**
+ * 是否忽略数字样式
+ */
+ private boolean ignoreNumStyle = true;
+ /**
+ * 是否忽略中文样式
+ */
+ private boolean ignoreChineseStyle = true;
+ /**
+ * 是否忽略英文样式
+ */
+ private boolean ignoreEnglishStyle = true;
+ /**
+ * 是否忽略重复
+ */
+ private boolean ignoreRepeat = false;
+
+ // 开启校验
+ /**
+ * 启用数字检测
+ */
+ private boolean sensitiveCheckNum = true;
+ /**
+ * 启用邮箱检测
+ */
+ private boolean sensitiveCheckEmail = true;
+ /**
+ * 启用 URL 检测
+ */
+ private boolean sensitiveCheckUrl = true;
+
+ // 额外配置
+ /**
+ * 检测数字时的长度
+ */
+ private int sensitiveCheckNumLen = 8;
+
+ //------------------------------------------------------------- 基本属性 END
/**
* 敏感词 map
*
+ * TODO: 暂时定义为 final,后续放开抽象。
+ *
* @since 0.0.1
*/
- private IWordMap sensitiveWordMap;
-
- /**
- * 默认的执行上下文
- *
- * @since 0.0.4
- */
- private final IWordContext context = buildDefaultContext();
+ private final IWordMap wordMap = WordMaps.defaults();
/**
* 禁止的单词
@@ -57,76 +103,21 @@ public class SensitiveWordBs {
private IWordAllow wordAllow = WordAllows.system();
/**
- * DCL 初始化 wordMap 信息
- *
- * 注意:map 的构建是一个比较耗时的动作
- * @since 0.0.4
+ * 替换策略
+ * @since 0.3.0
*/
- private synchronized void initWordMap() {
- // 加载配置信息
- List denyList = wordDeny.deny();
- List allowList = wordAllow.allow();
- List results = getActualDenyList(denyList, allowList);
-
- // 初始化 DFA 信息
- if(sensitiveWordMap == null) {
- sensitiveWordMap = new SensitiveWordMap();
- }
- // 便于可以多次初始化
- sensitiveWordMap.initWordMap(results);
- }
+ private ISensitiveWordReplace sensitiveWordReplace = SensitiveWordReplaces.chars();
/**
- * 获取禁止列表中真正的禁止词汇
- * @param denyList 禁止
- * @param allowList 允许
- * @return 结果
- * @since 0.1.1
+ * 上下文
+ * @since 0.3.0
*/
- List getActualDenyList(List denyList,
- List allowList) {
- if(CollectionUtil.isEmpty(denyList)) {
- return Collections.emptyList();
- }
- if(CollectionUtil.isEmpty(allowList)) {
- return denyList;
- }
+ private IWordContext context = SensitiveWordContext.newInstance();
- List formatDenyList = this.formatWordList(denyList);
- List formatAllowList = this.formatWordList(allowList);
-
- List resultList = new ArrayList<>();
- // O(1)
- Set allowSet = new HashSet<>(formatAllowList);
-
- for(String deny : formatDenyList) {
- if(allowSet.contains(deny)) {
- continue;
- }
-
- resultList.add(deny);
- }
- return resultList;
- }
-
- /**
- * 数据格式化处理
- * @param list 列表
- * @return 结果
- * @since 0.1.1
- */
- private List formatWordList(List list) {
- if(CollectionUtil.isEmpty(list)) {
- return list;
- }
-
- List resultList = new ArrayList<>(list.size());
- for(String word : list) {
- String formatWord = InnerFormatUtils.format(word, this.context);
- resultList.add(formatWord);
- }
-
- return resultList;
+ public SensitiveWordBs sensitiveWordReplace(ISensitiveWordReplace sensitiveWordReplace) {
+ ArgUtil.notNull(sensitiveWordReplace, "sensitiveWordReplace");
+ this.sensitiveWordReplace = sensitiveWordReplace;
+ return this;
}
/**
@@ -149,11 +140,68 @@ public class SensitiveWordBs {
* @return this
*/
public SensitiveWordBs init() {
+ // 初始化 context
+ this.initContext();
+
+ // 替换策略
+ final ICharFormat charFormat = CharFormats.initCharFormat(context);
+ context.charFormat(charFormat);
+
+ // 3. 初始化对应的 sensitiveCheck
+ final ISensitiveCheck sensitiveCheck = SensitiveChecks.initSensitiveCheck(context);
+ context.sensitiveCheck(sensitiveCheck);
+
+ //2. 初始化 word
this.initWordMap();
return this;
}
+ /**
+ * 构建默认的上下文
+ *
+ * @return 结果
+ * @since 0.0.4
+ */
+ private IWordContext initContext() {
+ this.context = SensitiveWordContext.newInstance();
+
+ // 格式统一化
+ context.ignoreCase(ignoreCase);
+ context.ignoreWidth(ignoreWidth);
+ context.ignoreNumStyle(ignoreNumStyle);
+ context.ignoreChineseStyle(ignoreChineseStyle);
+ context.ignoreEnglishStyle(ignoreEnglishStyle);
+ context.ignoreRepeat(ignoreRepeat);
+
+ // 开启校验
+ context.sensitiveCheckNum(sensitiveCheckNum);
+ context.sensitiveCheckEmail(sensitiveCheckEmail);
+ context.sensitiveCheckUrl(sensitiveCheckUrl);
+
+ // 额外配置
+ context.sensitiveCheckNumLen(sensitiveCheckNumLen);
+ context.sensitiveWordReplace(sensitiveWordReplace);
+
+ return context;
+ }
+
+ /**
+ * DCL 初始化 wordMap 信息
+ *
+ * 注意:map 的构建是一个比较耗时的动作
+ * @since 0.0.4
+ */
+ private synchronized void initWordMap() {
+ // 加载配置信息
+ List denyList = wordDeny.deny();
+ List allowList = wordAllow.allow();
+ List results = InnerWordDataUtils.getActualDenyList(denyList, allowList, context);
+
+ // 便于可以多次初始化
+ wordMap.initWordMap(results);
+ }
+
/**
* 设置禁止的实现
* @param wordDeny 禁止的实现
@@ -186,7 +234,7 @@ public class SensitiveWordBs {
* @return this
*/
public SensitiveWordBs enableNumCheck(boolean enableNumCheck) {
- this.context.sensitiveCheckNum(enableNumCheck);
+ this.sensitiveCheckNum = enableNumCheck;
return this;
}
@@ -197,7 +245,7 @@ public class SensitiveWordBs {
* @since 0.2.1
*/
public SensitiveWordBs numCheckLen(int numCheckLen) {
- this.context.sensitiveCheckNumLen(numCheckLen);
+ this.sensitiveCheckNumLen = numCheckLen;
return this;
}
@@ -209,7 +257,7 @@ public class SensitiveWordBs {
* @return this
*/
public SensitiveWordBs enableEmailCheck(boolean enableEmailCheck) {
- this.context.sensitiveCheckEmail(enableEmailCheck);
+ this.sensitiveCheckEmail = enableEmailCheck;
return this;
}
@@ -221,7 +269,7 @@ public class SensitiveWordBs {
* @return this
*/
public SensitiveWordBs enableUrlCheck(boolean enableUrlCheck) {
- this.context.sensitiveCheckUrl(enableUrlCheck);
+ this.sensitiveCheckUrl = enableUrlCheck;
return this;
}
@@ -232,7 +280,7 @@ public class SensitiveWordBs {
* @since 0.0.14
*/
public SensitiveWordBs ignoreCase(boolean ignoreCase) {
- this.context.ignoreCase(ignoreCase);
+ this.ignoreCase = ignoreCase;
return this;
}
@@ -243,7 +291,7 @@ public class SensitiveWordBs {
* @since 0.0.14
*/
public SensitiveWordBs ignoreWidth(boolean ignoreWidth) {
- this.context.ignoreWidth(ignoreWidth);
+ this.ignoreWidth = ignoreWidth;
return this;
}
@@ -254,7 +302,7 @@ public class SensitiveWordBs {
* @since 0.0.14
*/
public SensitiveWordBs ignoreNumStyle(boolean ignoreNumStyle) {
- this.context.ignoreNumStyle(ignoreNumStyle);
+ this.ignoreNumStyle = ignoreNumStyle;
return this;
}
@@ -265,7 +313,7 @@ public class SensitiveWordBs {
* @since 0.0.14
*/
public SensitiveWordBs ignoreChineseStyle(boolean ignoreChineseStyle) {
- this.context.ignoreChineseStyle(ignoreChineseStyle);
+ this.ignoreChineseStyle = ignoreChineseStyle;
return this;
}
@@ -276,7 +324,7 @@ public class SensitiveWordBs {
* @since 0.0.14
*/
public SensitiveWordBs ignoreEnglishStyle(boolean ignoreEnglishStyle) {
- this.context.ignoreEnglishStyle(ignoreEnglishStyle);
+ this.ignoreEnglishStyle = ignoreEnglishStyle;
return this;
}
@@ -287,37 +335,11 @@ public class SensitiveWordBs {
* @since 0.0.14
*/
public SensitiveWordBs ignoreRepeat(boolean ignoreRepeat) {
- this.context.ignoreRepeat(ignoreRepeat);
+ this.ignoreRepeat = ignoreRepeat;
return this;
}
- /**
- * 构建默认的上下文
- *
- * @return 结果
- * @since 0.0.4
- */
- private IWordContext buildDefaultContext() {
- IWordContext wordContext = SensitiveWordContext.newInstance();
- // 格式统一化
- wordContext.ignoreCase(true);
- wordContext.ignoreWidth(true);
- wordContext.ignoreNumStyle(true);
- wordContext.ignoreChineseStyle(true);
- wordContext.ignoreEnglishStyle(true);
- wordContext.ignoreRepeat(false);
-
- // 开启校验
- wordContext.sensitiveCheckNum(true);
- wordContext.sensitiveCheckEmail(true);
- wordContext.sensitiveCheckUrl(true);
-
- // 额外配置
- wordContext.sensitiveCheckNumLen(8);
-
- return wordContext;
- }
-
+ //------------------------------------------------------------------------------------ 公开方法 START
/**
* 是否包含敏感词
*
@@ -326,9 +348,7 @@ public class SensitiveWordBs {
* @since 0.0.1
*/
public boolean contains(final String target) {
- statusCheck();
-
- return sensitiveWordMap.contains(target, context);
+ return wordMap.contains(target, context);
}
/**
@@ -369,9 +389,8 @@ public class SensitiveWordBs {
*/
public List findAll(final String target, final IWordResultHandler handler) {
ArgUtil.notNull(handler, "handler");
- statusCheck();
- List wordResults = sensitiveWordMap.findAll(target, context);
+ List wordResults = wordMap.findAll(target, context);
return CollectionUtil.toList(wordResults, new IHandler() {
@Override
public R handle(IWordResult wordResult) {
@@ -392,67 +411,22 @@ public class SensitiveWordBs {
*/
public R findFirst(final String target, final IWordResultHandler handler) {
ArgUtil.notNull(handler, "handler");
- statusCheck();
- IWordResult wordResult = sensitiveWordMap.findFirst(target, context);
+ IWordResult wordResult = wordMap.findFirst(target, context);
return handler.handle(wordResult);
}
-
/**
* 替换所有内容
*
* @param target 目标字符串
- * @param replaceChar 替换为的 char
- * @return 替换后结果
- * @since 0.0.2
- */
- public String replace(final String target, final char replaceChar) {
- ISensitiveWordReplace replace = new SensitiveWordReplaceChar(replaceChar);
-
- return replace(target, replace);
- }
-
- /**
- * 替换所有内容
- *
- * @param target 目标字符串
- * @param replace 替换策略
* @return 替换后结果
* @since 0.2.0
*/
- public String replace(final String target, final ISensitiveWordReplace replace) {
- statusCheck();
-
- return sensitiveWordMap.replace(target, replace, context);
- }
-
- /**
- * 替换所有内容
- * 1. 默认使用空格替换,避免星号改变 md 的格式。
- *
- * @param target 目标字符串
- * @return 替换后结果
- * @since 0.0.2
- */
public String replace(final String target) {
- return this.replace(target, CharConst.STAR);
+ return wordMap.replace(target, context);
}
-
- /**
- * 状态校验
- * @since 0.0.13
- */
- private void statusCheck(){
- //DLC
- if(sensitiveWordMap == null) {
- synchronized (this) {
- if(sensitiveWordMap == null) {
- this.init();
- }
- }
- }
- }
+ //------------------------------------------------------------------------------------ 公开方法 END
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
index 8b2e4c1..18ff5e4 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
@@ -1,6 +1,9 @@
package com.github.houbb.sensitive.word.bs;
+import com.github.houbb.sensitive.word.api.ICharFormat;
+import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import java.util.Map;
@@ -77,6 +80,44 @@ public class SensitiveWordContext implements IWordContext {
*/
private int sensitiveCheckNumLen;
+ /**
+ * 检测策略
+ * @since 0.3.0
+ */
+ private ISensitiveCheck sensitiveCheck;
+
+ /**
+ * 替换策略
+ * @since 0.3.0
+ */
+ private ISensitiveWordReplace sensitiveWordReplace;
+
+ /**
+ * 格式化
+ * @since 0.3.0
+ */
+ private ICharFormat charFormat;
+
+ @Override
+ public ISensitiveWordReplace sensitiveWordReplace() {
+ return sensitiveWordReplace;
+ }
+
+ public SensitiveWordContext sensitiveWordReplace(ISensitiveWordReplace sensitiveWordReplace) {
+ this.sensitiveWordReplace = sensitiveWordReplace;
+ return this;
+ }
+
+ @Override
+ public ISensitiveCheck sensitiveCheck() {
+ return sensitiveCheck;
+ }
+
+ public SensitiveWordContext sensitiveCheck(ISensitiveCheck sensitiveCheck) {
+ this.sensitiveCheck = sensitiveCheck;
+ return this;
+ }
+
/**
* 私有化构造器
* @since 0.0.4
@@ -214,4 +255,13 @@ public class SensitiveWordContext implements IWordContext {
return this;
}
+ @Override
+ public ICharFormat charFormat() {
+ return charFormat;
+ }
+
+ public SensitiveWordContext charFormat(ICharFormat charFormat) {
+ this.charFormat = charFormat;
+ return this;
+ }
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/package-info.java b/src/main/java/com/github/houbb/sensitive/word/bs/package-info.java
deleted file mode 100644
index e6f6053..0000000
--- a/src/main/java/com/github/houbb/sensitive/word/bs/package-info.java
+++ /dev/null
@@ -1,5 +0,0 @@
-/**
- * 引导类定义
- * @since 0.0.1
- */
-package com.github.houbb.sensitive.word.bs;
\ No newline at end of file
diff --git a/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java b/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java
index 1b239bb..0950534 100644
--- a/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java
+++ b/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java
@@ -43,4 +43,10 @@ public final class AppConst {
*/
public static final String SENSITIVE_WORD_ALLOW_PATH = "/sensitive_word_allow.txt";
+ /**
+ * 最长的网址长度
+ * @since 0.3.0
+ */
+ public static final int MAX_WEB_SITE_LEN = 70;
+
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java
index 2fe076b..bad461a 100644
--- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java
+++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java
@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.core;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
import com.github.houbb.sensitive.word.api.IWordResultHandler;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
+import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaces;
import java.util.List;
@@ -69,7 +70,8 @@ public final class SensitiveWordHelper {
* @since 0.2.0
*/
public static String replace(final String target, final ISensitiveWordReplace replace) {
- return WORD_BS.replace(target, replace);
+ SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().sensitiveWordReplace(replace).init();
+ return sensitiveWordBs.replace(target);
}
/**
@@ -81,7 +83,9 @@ public final class SensitiveWordHelper {
* @since 0.0.13
*/
public static String replace(final String target, final char replaceChar) {
- return WORD_BS.replace(target, replaceChar);
+ final ISensitiveWordReplace replace = SensitiveWordReplaces.chars(replaceChar);
+
+ return replace(target, replace);
}
/**
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowSystem.java b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowSystem.java
index 284da5d..c596776 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowSystem.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowSystem.java
@@ -15,6 +15,15 @@ import java.util.List;
@ThreadSafe
public class WordAllowSystem implements IWordAllow {
+ /**
+ * @since 0.3.0
+ */
+ private static final WordAllowSystem INSTANCE = new WordAllowSystem();
+
+ public static WordAllowSystem getInstance() {
+ return INSTANCE;
+ }
+
@Override
public List allow() {
return StreamUtil.readAllLines("/sensitive_word_allow.txt");
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java
index f5ce28f..48f6cd1 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java
@@ -1,6 +1,5 @@
package com.github.houbb.sensitive.word.support.allow;
-import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.util.util.ArrayUtil;
import com.github.houbb.sensitive.word.api.IWordAllow;
@@ -43,7 +42,7 @@ public final class WordAllows {
* @since 0.0.13
*/
public static IWordAllow system() {
- return Instances.singleton(WordAllowSystem.class);
+ return WordAllowSystem.getInstance();
}
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckChain.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckChain.java
deleted file mode 100644
index 2d099fa..0000000
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckChain.java
+++ /dev/null
@@ -1,58 +0,0 @@
-package com.github.houbb.sensitive.word.support.check.impl;
-
-import com.github.houbb.heaven.annotation.ThreadSafe;
-import com.github.houbb.heaven.support.instance.impl.Instances;
-import com.github.houbb.heaven.util.guava.Guavas;
-import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
-import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
-import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
-
-import java.util.List;
-
-/**
- * 敏感词检测责任链模式
- *
- * 这里可以提供一个公共的父类。
- *
- *
- * DFA 算法的优化可以参考论文:
- * 【DFA 算法】各种论文。
- *
- * @author binbin.hou
- * @since 0.0.5
- */
-@ThreadSafe
-public class SensitiveCheckChain implements ISensitiveCheck {
-
- @Override
- public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
- // 初始化责任链
- List sensitiveChecks = Guavas.newArrayList();
- // 默认添加敏感词校验
- sensitiveChecks.add(Instances.singleton(SensitiveCheckWord.class));
- if(context.sensitiveCheckNum()) {
- sensitiveChecks.add(Instances.singleton(SensitiveCheckNum.class));
- }
- if(context.sensitiveCheckEmail()) {
- sensitiveChecks.add(Instances.singleton(SensitiveCheckEmail.class));
- }
- if(context.sensitiveCheckUrl()) {
- sensitiveChecks.add(Instances.singleton(SensitiveCheckUrl.class));
- }
-
- // 循环调用
- for(ISensitiveCheck sensitiveCheck : sensitiveChecks) {
- SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(txt, beginIndex, validModeEnum, context);
-
- if(result.index() > 0) {
- return result;
- }
- }
-
- // 这里直接进行正则表达式相关的调用。
- // 默认返回 0
- return SensitiveCheckResult.of(0, SensitiveCheckChain.class);
- }
-
-}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java
index 3718b5c..1429d03 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java
@@ -1,14 +1,12 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
-import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.util.regex.RegexUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
-import com.github.houbb.sensitive.word.support.format.CharFormatChain;
/**
* email 正则表达式检测实现。
@@ -28,6 +26,15 @@ import com.github.houbb.sensitive.word.support.format.CharFormatChain;
@ThreadSafe
public class SensitiveCheckEmail implements ISensitiveCheck {
+ /**
+ * @since 0.3.0
+ */
+ private static final ISensitiveCheck INSTANCE = new SensitiveCheckEmail();
+
+ public static ISensitiveCheck getInstance() {
+ return INSTANCE;
+ }
+
@Override
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
// 记录敏感词的长度
@@ -40,7 +47,7 @@ public class SensitiveCheckEmail implements ISensitiveCheck {
// 后期如果有想法,对 DFA 进一步深入学习后,将进行优化。
for(int i = beginIndex; i < txt.length(); i++) {
char currentChar = txt.charAt(i);
- char mappingChar = Instances.singleton(CharFormatChain.class)
+ char mappingChar = context.charFormat()
.format(currentChar, context);
if(CharUtil.isEmilChar(mappingChar)) {
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java
new file mode 100644
index 0000000..4572c12
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java
@@ -0,0 +1,51 @@
+package com.github.houbb.sensitive.word.support.check.impl;
+
+import com.github.houbb.heaven.support.pipeline.Pipeline;
+import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
+import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
+import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
+import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
+
+import java.util.List;
+
+/**
+ * 检测初始化类
+ * @since 0.3.0
+ */
+public abstract class SensitiveCheckInit implements ISensitiveCheck {
+
+ /**
+ * 初始化列表
+ *
+ * @param pipeline 当前列表泳道
+ * @since 0.0.13
+ */
+ protected abstract void init(final Pipeline pipeline);
+
+
+ @Override
+ public SensitiveCheckResult sensitiveCheck(String txt,
+ int beginIndex,
+ ValidModeEnum validModeEnum,
+ IWordContext context) {
+
+ Pipeline pipeline = new DefaultPipeline<>();
+ this.init(pipeline);
+ List sensitiveChecks = pipeline.list();
+
+ // 循环调用
+ for(ISensitiveCheck sensitiveCheck : sensitiveChecks) {
+ SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(txt, beginIndex, validModeEnum, context);
+
+ if(result.index() > 0) {
+ return result;
+ }
+ }
+
+ // 这里直接进行正则表达式相关的调用。
+ // 默认返回 0
+ return SensitiveCheckNone.getNoneResult();
+ }
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java
new file mode 100644
index 0000000..3376d7b
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java
@@ -0,0 +1,41 @@
+package com.github.houbb.sensitive.word.support.check.impl;
+
+import com.github.houbb.heaven.annotation.ThreadSafe;
+import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
+import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
+import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
+
+/**
+ * 未匹配
+ *
+ * @author binbin.hou
+ * @since 0.3.0
+ */
+@ThreadSafe
+public class SensitiveCheckNone implements ISensitiveCheck {
+
+ /**
+ * @since 0.3.0
+ */
+ private static final ISensitiveCheck INSTANCE = new SensitiveCheckNone();
+
+ public static ISensitiveCheck getInstance() {
+ return INSTANCE;
+ }
+
+ /**
+ * 只有一个未匹配
+ */
+ private static final SensitiveCheckResult NONE_RESULT = SensitiveCheckResult.of(0, SensitiveCheckNone.class);
+
+ @Override
+ public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
+ return NONE_RESULT;
+ }
+
+ public static SensitiveCheckResult getNoneResult() {
+ return NONE_RESULT;
+ }
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java
index 7facc1f..2824e6a 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java
@@ -1,12 +1,10 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
-import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
-import com.github.houbb.sensitive.word.support.format.CharFormatChain;
/**
* 敏感词监测实现
@@ -18,6 +16,15 @@ import com.github.houbb.sensitive.word.support.format.CharFormatChain;
@ThreadSafe
public class SensitiveCheckNum implements ISensitiveCheck {
+ /**
+ * @since 0.3.0
+ */
+ private static final ISensitiveCheck INSTANCE = new SensitiveCheckNum();
+
+ public static ISensitiveCheck getInstance() {
+ return INSTANCE;
+ }
+
@Override
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
// 记录敏感词的长度
@@ -26,7 +33,7 @@ public class SensitiveCheckNum implements ISensitiveCheck {
for (int i = beginIndex; i < txt.length(); i++) {
char c = txt.charAt(i);
- char charKey = Instances.singleton(CharFormatChain.class).format(c, context);
+ char charKey = context.charFormat().format(c, context);
// 如果是数字
// 满足进入的条件
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java
index d5760fb..971b6e9 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java
@@ -1,15 +1,13 @@
package com.github.houbb.sensitive.word.support.check.impl;
-import com.github.houbb.heaven.annotation.CommonEager;
import com.github.houbb.heaven.annotation.ThreadSafe;
-import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.util.regex.RegexUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
-import com.github.houbb.sensitive.word.support.format.CharFormatChain;
/**
* URL 正则表达式检测实现。
@@ -27,10 +25,13 @@ import com.github.houbb.sensitive.word.support.format.CharFormatChain;
public class SensitiveCheckUrl implements ISensitiveCheck {
/**
- * 最长的网址长度
- * @since 0.0.12
+ * @since 0.3.0
*/
- private static final int MAX_WEB_SITE_LEN = 70;
+ private static final ISensitiveCheck INSTANCE = new SensitiveCheckUrl();
+
+ public static ISensitiveCheck getInstance() {
+ return INSTANCE;
+ }
@Override
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
@@ -44,11 +45,11 @@ public class SensitiveCheckUrl implements ISensitiveCheck {
// 后期如果有想法,对 DFA 进一步深入学习后,将进行优化。
for(int i = beginIndex; i < txt.length(); i++) {
char currentChar = txt.charAt(i);
- char mappingChar = Instances.singleton(CharFormatChain.class)
+ char mappingChar = context.charFormat()
.format(currentChar, context);
if(CharUtil.isWebSiteChar(mappingChar)
- && lengthCount <= MAX_WEB_SITE_LEN) {
+ && lengthCount <= AppConst.MAX_WEB_SITE_LEN) {
lengthCount++;
stringBuilder.append(currentChar);
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java
index 9b6a364..8b91345 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java
@@ -1,14 +1,12 @@
package com.github.houbb.sensitive.word.support.check.impl;
import com.github.houbb.heaven.annotation.ThreadSafe;
-import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.lang.ObjectUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
-import com.github.houbb.sensitive.word.support.format.CharFormatChain;
import java.util.Map;
@@ -20,6 +18,15 @@ import java.util.Map;
@ThreadSafe
public class SensitiveCheckWord implements ISensitiveCheck {
+ /**
+ * @since 0.3.0
+ */
+ private static final ISensitiveCheck INSTANCE = new SensitiveCheckWord();
+
+ public static ISensitiveCheck getInstance() {
+ return INSTANCE;
+ }
+
@Override
public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
Map nowMap = context.sensitiveWordMap();
@@ -90,7 +97,7 @@ public class SensitiveCheckWord implements ISensitiveCheck {
final String txt,
final int index) {
char c = txt.charAt(index);
- char mappingChar = Instances.singleton(CharFormatChain.class).format(c, context);
+ char mappingChar = context.charFormat().format(c, context);
// 这里做一次重复词的处理
//TODO: 这里可以优化,是否获取一次。
@@ -99,7 +106,7 @@ public class SensitiveCheckWord implements ISensitiveCheck {
if(context.ignoreRepeat()
&& index > 0) {
char preChar = txt.charAt(index-1);
- char preMappingChar = Instances.singleton(CharFormatChain.class)
+ char preMappingChar = context.charFormat()
.format(preChar, context);
// 直接赋值为上一个 map
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveChecks.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveChecks.java
new file mode 100644
index 0000000..71614e7
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveChecks.java
@@ -0,0 +1,97 @@
+package com.github.houbb.sensitive.word.support.check.impl;
+
+import com.github.houbb.heaven.support.pipeline.Pipeline;
+import com.github.houbb.heaven.util.util.ArrayUtil;
+import com.github.houbb.heaven.util.util.CollectionUtil;
+import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * 敏感词检测工具
+ * @since 0.3.0
+ */
+public final class SensitiveChecks {
+
+ private SensitiveChecks(){}
+
+ /**
+ * 初始化敏感检测策略
+ * @param context 上下文
+ *
+ * @return 实现
+ * @since 0.3.0
+ */
+ public static ISensitiveCheck initSensitiveCheck(final IWordContext context) {
+ List sensitiveCheckList = new ArrayList<>();
+
+ // 默认添加敏感词校验
+ sensitiveCheckList.add(SensitiveChecks.word());
+
+ if(context.sensitiveCheckNum()) {
+ sensitiveCheckList.add(SensitiveChecks.num());
+ }
+ if(context.sensitiveCheckEmail()) {
+ sensitiveCheckList.add(SensitiveChecks.email());
+ }
+ if(context.sensitiveCheckUrl()) {
+ sensitiveCheckList.add(SensitiveChecks.url());
+ }
+
+ return SensitiveChecks.chains(sensitiveCheckList);
+ }
+
+ public static ISensitiveCheck chains(final ISensitiveCheck... sensitiveChecks) {
+ if (ArrayUtil.isEmpty(sensitiveChecks)){
+ return none();
+ }
+
+ return new SensitiveCheckInit() {
+ @Override
+ protected void init(Pipeline pipeline) {
+ for(ISensitiveCheck check : sensitiveChecks) {
+ pipeline.addLast(check);
+ }
+ }
+ };
+ }
+
+ public static ISensitiveCheck chains(final Collection sensitiveChecks) {
+ if (CollectionUtil.isEmpty(sensitiveChecks)){
+ return none();
+ }
+
+ return new SensitiveCheckInit() {
+ @Override
+ protected void init(Pipeline pipeline) {
+ for(ISensitiveCheck check : sensitiveChecks) {
+ pipeline.addLast(check);
+ }
+ }
+ };
+ }
+
+ public static ISensitiveCheck email() {
+ return SensitiveCheckEmail.getInstance();
+ }
+
+ public static ISensitiveCheck num() {
+ return SensitiveCheckNum.getInstance();
+ }
+
+ public static ISensitiveCheck url() {
+ return SensitiveCheckUrl.getInstance();
+ }
+
+ public static ISensitiveCheck word() {
+ return SensitiveCheckWord.getInstance();
+ }
+
+ public static ISensitiveCheck none() {
+ return SensitiveCheckNone.getInstance();
+ }
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java b/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java
deleted file mode 100644
index f0159f0..0000000
--- a/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java
+++ /dev/null
@@ -1,53 +0,0 @@
-package com.github.houbb.sensitive.word.support.data;
-
-import com.github.houbb.heaven.annotation.ThreadSafe;
-import com.github.houbb.heaven.util.guava.Guavas;
-import com.github.houbb.heaven.util.io.StreamUtil;
-import com.github.houbb.heaven.util.util.CollectionUtil;
-import com.github.houbb.sensitive.word.api.IWordData;
-import com.github.houbb.sensitive.word.constant.AppConst;
-
-import java.util.List;
-
-/**
- * 数据加载使用单例的模式,只需要加载一次即可。
- *
- * @author binbin.hou
- * @since 0.0.1
- */
-@ThreadSafe
-public class SensitiveWordData implements IWordData {
-
- /**
- * 默认的内置行
- *
- * @since 0.0.1
- */
- private static List defaultLines;
-
- static {
- synchronized (SensitiveWordData.class) {
- long start = System.currentTimeMillis();
- defaultLines = Guavas.newArrayList(AppConst.DICT_SIZE+AppConst.DICT_EN_SIZE);
- defaultLines = StreamUtil.readAllLines("/dict.txt");
- defaultLines.addAll(StreamUtil.readAllLines("/dict_en.txt"));
-
- // 用户自定义
- List denyList = StreamUtil.readAllLines("/sensitive_word_deny.txt");
- defaultLines.addAll(denyList);
-
- // 移除白名单词语
- List allowList = StreamUtil.readAllLines("/sensitive_word_allow.txt");
- defaultLines = CollectionUtil.difference(defaultLines, allowList);
-
- long end = System.currentTimeMillis();
- }
- }
-
-
- @Override
- public List getWordData() {
- return defaultLines;
- }
-
-}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenySystem.java b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenySystem.java
index f7282db..0d117b6 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenySystem.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenySystem.java
@@ -16,6 +16,15 @@ import java.util.List;
@ThreadSafe
public class WordDenySystem implements IWordDeny {
+ /**
+ * @since 0.3.0
+ */
+ private static final IWordDeny INSTANCE = new WordDenySystem();
+
+ public static IWordDeny getInstance() {
+ return INSTANCE;
+ }
+
@Override
public List deny() {
List results = StreamUtil.readAllLines("/dict.txt");
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java
index c15452e..e80be0e 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java
@@ -1,6 +1,5 @@
package com.github.houbb.sensitive.word.support.deny;
-import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.util.util.ArrayUtil;
import com.github.houbb.sensitive.word.api.IWordDeny;
@@ -43,7 +42,7 @@ public final class WordDenys {
* @since 0.0.13
*/
public static IWordDeny system() {
- return Instances.singleton(WordDenySystem.class);
+ return WordDenySystem.getInstance();
}
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatChain.java b/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatChain.java
deleted file mode 100644
index 0b72d8f..0000000
--- a/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatChain.java
+++ /dev/null
@@ -1,48 +0,0 @@
-package com.github.houbb.sensitive.word.support.format;
-
-import com.github.houbb.heaven.annotation.ThreadSafe;
-import com.github.houbb.heaven.support.instance.impl.Instances;
-import com.github.houbb.heaven.util.guava.Guavas;
-import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.api.ICharFormat;
-
-import java.util.List;
-
-/**
- * 格式化责任链
- * @author binbin.hou
- * @since 0.0.5
- */
-@ThreadSafe
-public class CharFormatChain implements ICharFormat {
-
- @Override
- public char format(char original, IWordContext context) {
- char result = original;
-
- List charFormats = Guavas.newArrayList();
- if(context.ignoreEnglishStyle()) {
- charFormats.add(Instances.singleton(IgnoreEnglishStyleFormat.class));
- }
- if(context.ignoreCase()) {
- charFormats.add(Instances.singleton(IgnoreCaseCharFormat.class));
- }
- if(context.ignoreWidth()) {
- charFormats.add(Instances.singleton(IgnoreWidthCharFormat.class));
- }
- if(context.ignoreNumStyle()) {
- charFormats.add(Instances.singleton(IgnoreNumStyleCharFormat.class));
- }
- if(context.ignoreChineseStyle()) {
- charFormats.add(Instances.singleton(IgnoreChineseStyleFormat.class));
- }
-
- // 循环执行
- for(ICharFormat charFormat : charFormats) {
- result = charFormat.format(result, context);
- }
-
- return result;
- }
-
-}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatInit.java b/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatInit.java
new file mode 100644
index 0000000..31e2f0e
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatInit.java
@@ -0,0 +1,43 @@
+package com.github.houbb.sensitive.word.support.format;
+
+import com.github.houbb.heaven.annotation.ThreadSafe;
+import com.github.houbb.heaven.support.pipeline.Pipeline;
+import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
+import com.github.houbb.sensitive.word.api.ICharFormat;
+import com.github.houbb.sensitive.word.api.IWordContext;
+
+import java.util.List;
+
+/**
+ * 格式化责任链
+ * @author binbin.hou
+ * @since 0.0.5
+ */
+@ThreadSafe
+public abstract class CharFormatInit implements ICharFormat {
+
+ /**
+ * 初始化列表
+ *
+ * @param pipeline 当前列表泳道
+ * @since 0.0.13
+ */
+ protected abstract void init(final Pipeline pipeline);
+
+ @Override
+ public char format(char original, IWordContext context) {
+ Pipeline pipeline = new DefaultPipeline<>();
+ init(pipeline);
+
+ char result = original;
+
+ // 循环执行
+ List charFormats = pipeline.list();
+ for(ICharFormat charFormat : charFormats) {
+ result = charFormat.format(result, context);
+ }
+
+ return result;
+ }
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormats.java b/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormats.java
new file mode 100644
index 0000000..e4dbccf
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormats.java
@@ -0,0 +1,112 @@
+package com.github.houbb.sensitive.word.support.format;
+
+import com.github.houbb.heaven.support.pipeline.Pipeline;
+import com.github.houbb.heaven.util.guava.Guavas;
+import com.github.houbb.heaven.util.util.ArrayUtil;
+import com.github.houbb.heaven.util.util.CollectionUtil;
+import com.github.houbb.sensitive.word.api.ICharFormat;
+import com.github.houbb.sensitive.word.api.IWordContext;
+
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * 格式化工具类
+ * @author binbin.hou
+ * @since 0.3.5
+ */
+public final class CharFormats {
+
+ private CharFormats(){}
+
+ /**
+ * 初始化格式化
+ * @param context 上下文
+ * @return 结果
+ * @since 0.3.0
+ */
+ public static ICharFormat initCharFormat(final IWordContext context) {
+ List charFormats = Guavas.newArrayList();
+ if(context.ignoreEnglishStyle()) {
+ charFormats.add(ignoreEnglishStyle());
+ }
+ if(context.ignoreCase()) {
+ charFormats.add(ignoreCase());
+ }
+ if(context.ignoreWidth()) {
+ charFormats.add(ignoreWidth());
+ }
+ if(context.ignoreNumStyle()) {
+ charFormats.add(ignoreNumStyle());
+ }
+ if(context.ignoreChineseStyle()) {
+ charFormats.add(ignoreChineseStyle());
+ }
+
+ return chains(charFormats);
+ }
+
+ /**
+ * 链式
+ * @param charFormats 列表
+ * @return 结果
+ */
+ public static ICharFormat chains(final ICharFormat ... charFormats) {
+ if(ArrayUtil.isEmpty(charFormats)) {
+ return none();
+ }
+
+ return new CharFormatInit() {
+ @Override
+ protected void init(Pipeline pipeline) {
+ for(ICharFormat charFormat : charFormats) {
+ pipeline.addLast(charFormat);
+ }
+ }
+ };
+ }
+
+ /**
+ * 链式
+ * @param charFormats 列表
+ * @return 结果
+ */
+ public static ICharFormat chains(final Collection charFormats) {
+ if(CollectionUtil.isEmpty(charFormats)) {
+ return none();
+ }
+
+ return new CharFormatInit() {
+ @Override
+ protected void init(Pipeline pipeline) {
+ for(ICharFormat charFormat : charFormats) {
+ pipeline.addLast(charFormat);
+ }
+ }
+ };
+ }
+
+ public static ICharFormat none() {
+ return NoneCharFormat.getInstance();
+ }
+ public static ICharFormat ignoreCase() {
+ return IgnoreCaseCharFormat.getInstance();
+ }
+
+ public static ICharFormat ignoreEnglishStyle() {
+ return IgnoreEnglishStyleFormat.getInstance();
+ }
+
+ public static ICharFormat ignoreChineseStyle() {
+ return IgnoreChineseStyleFormat.getInstance();
+ }
+
+ public static ICharFormat ignoreNumStyle() {
+ return IgnoreNumStyleCharFormat.getInstance();
+ }
+
+ public static ICharFormat ignoreWidth() {
+ return IgnoreWidthCharFormat.getInstance();
+ }
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreCaseCharFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreCaseCharFormat.java
index 9c43d22..bcc69ad 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreCaseCharFormat.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreCaseCharFormat.java
@@ -12,6 +12,12 @@ import com.github.houbb.sensitive.word.api.IWordContext;
@ThreadSafe
public class IgnoreCaseCharFormat implements ICharFormat {
+ private static final ICharFormat INSTANCE = new IgnoreCaseCharFormat();
+
+ public static ICharFormat getInstance() {
+ return INSTANCE;
+ }
+
@Override
public char format(char original, IWordContext context) {
return Character.toLowerCase(original);
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreChineseStyleFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreChineseStyleFormat.java
index 60efcba..fa9e375 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreChineseStyleFormat.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreChineseStyleFormat.java
@@ -7,13 +7,19 @@ import com.github.houbb.sensitive.word.api.ICharFormat;
import com.github.houbb.sensitive.word.api.IWordContext;
/**
- * 忽略大小写
+ * 忽略中文样式
* @author binbin.hou
* @since 0.0.5
*/
@ThreadSafe
public class IgnoreChineseStyleFormat implements ICharFormat {
+ private static final ICharFormat INSTANCE = new IgnoreChineseStyleFormat();
+
+ public static ICharFormat getInstance() {
+ return INSTANCE;
+ }
+
@Override
public char format(char original, IWordContext context) {
String string = String.valueOf(original);
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreEnglishStyleFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreEnglishStyleFormat.java
index e132254..c8b3148 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreEnglishStyleFormat.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreEnglishStyleFormat.java
@@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.format;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.ICharFormat;
import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.utils.CharUtils;
+import com.github.houbb.sensitive.word.utils.InnerCharUtils;
/**
* 忽略英文的各种格式
@@ -13,9 +13,15 @@ import com.github.houbb.sensitive.word.utils.CharUtils;
@ThreadSafe
public class IgnoreEnglishStyleFormat implements ICharFormat {
+ private static final ICharFormat INSTANCE = new IgnoreEnglishStyleFormat();
+
+ public static ICharFormat getInstance() {
+ return INSTANCE;
+ }
+
@Override
public char format(char original, IWordContext context) {
- return CharUtils.getMappingChar(original);
+ return InnerCharUtils.getMappingChar(original);
}
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreNumStyleCharFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreNumStyleCharFormat.java
index 2923626..4fd8511 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreNumStyleCharFormat.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreNumStyleCharFormat.java
@@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.format;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.ICharFormat;
-import com.github.houbb.sensitive.word.utils.NumUtils;
+import com.github.houbb.sensitive.word.utils.InnerNumUtils;
/**
* 忽略数字的样式
@@ -13,9 +13,15 @@ import com.github.houbb.sensitive.word.utils.NumUtils;
@ThreadSafe
public class IgnoreNumStyleCharFormat implements ICharFormat {
+ private static final ICharFormat INSTANCE = new IgnoreNumStyleCharFormat();
+
+ public static ICharFormat getInstance() {
+ return INSTANCE;
+ }
+
@Override
public char format(char original, IWordContext context) {
- return NumUtils.getMappingChar(original);
+ return InnerNumUtils.getMappingChar(original);
}
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreWidthCharFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreWidthCharFormat.java
index 64f8f38..e34cab3 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreWidthCharFormat.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreWidthCharFormat.java
@@ -6,13 +6,19 @@ import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.ICharFormat;
/**
- * 格式化责任链
+ * 格式化字宽度
* @author binbin.hou
* @since 0.0.5
*/
@ThreadSafe
public class IgnoreWidthCharFormat implements ICharFormat {
+ private static final ICharFormat INSTANCE = new IgnoreWidthCharFormat();
+
+ public static ICharFormat getInstance() {
+ return INSTANCE;
+ }
+
@Override
public char format(char original, IWordContext context) {
return CharUtil.toHalfWidth(original);
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/NoneCharFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/NoneCharFormat.java
new file mode 100644
index 0000000..c1c092d
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/NoneCharFormat.java
@@ -0,0 +1,27 @@
+package com.github.houbb.sensitive.word.support.format;
+
+import com.github.houbb.heaven.annotation.ThreadSafe;
+import com.github.houbb.sensitive.word.api.ICharFormat;
+import com.github.houbb.sensitive.word.api.IWordContext;
+
+/**
+ * 无处理
+ *
+ * @author binbin.hou
+ * @since 0.0.5
+ */
+@ThreadSafe
+public class NoneCharFormat implements ICharFormat {
+
+ private static final ICharFormat INSTANCE = new NoneCharFormat();
+
+ public static ICharFormat getInstance() {
+ return INSTANCE;
+ }
+
+ @Override
+ public char format(char original, IWordContext context) {
+ return original;
+ }
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
index dc7b28f..8b96a52 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
@@ -1,19 +1,15 @@
package com.github.houbb.sensitive.word.support.map;
import com.github.houbb.heaven.annotation.ThreadSafe;
-import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.heaven.util.io.FileUtil;
-import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.lang.ObjectUtil;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
-import com.github.houbb.heaven.util.util.MapUtil;
import com.github.houbb.sensitive.word.api.*;
import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
-import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckChain;
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl;
import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceContext;
import com.github.houbb.sensitive.word.support.result.WordResult;
@@ -52,7 +48,6 @@ public class SensitiveWordMap implements IWordMap {
@Override
@SuppressWarnings("unchecked")
public synchronized void initWordMap(Collection collection) {
- long startTime = System.currentTimeMillis();
// 避免扩容带来的消耗
Map newInnerWordMap = new HashMap(collection.size());
@@ -99,8 +94,6 @@ public class SensitiveWordMap implements IWordMap {
// 最后更新为新的 map,保证更新过程中旧的数据可用
this.innerWordMap = newInnerWordMap;
-
- long endTime = System.currentTimeMillis();
}
/**
@@ -154,12 +147,12 @@ public class SensitiveWordMap implements IWordMap {
}
@Override
- public String replace(String target, final ISensitiveWordReplace replace, final IWordContext context) {
+ public String replace(String target, final IWordContext context) {
if(StringUtil.isEmpty(target)) {
return target;
}
- return this.replaceSensitiveWord(target, replace, context);
+ return this.replaceSensitiveWord(target, context);
}
/**
@@ -211,13 +204,11 @@ public class SensitiveWordMap implements IWordMap {
/**
* 直接替换敏感词,返回替换后的结果
* @param target 文本信息
- * @param replace 替换策略
* @param context 上下文
* @return 脱敏后的字符串
* @since 0.0.2
*/
private String replaceSensitiveWord(final String target,
- final ISensitiveWordReplace replace,
final IWordContext context) {
if(StringUtil.isEmpty(target)) {
return target;
@@ -245,7 +236,7 @@ public class SensitiveWordMap implements IWordMap {
ISensitiveWordReplaceContext replaceContext = SensitiveWordReplaceContext.newInstance()
.sensitiveWord(string)
.wordLength(wordLength);
- String replaceStr = replace.replace(replaceContext);
+ String replaceStr = context.sensitiveWordReplace().replace(replaceContext);
resultBuilder.append(replaceStr);
}
@@ -267,7 +258,7 @@ public class SensitiveWordMap implements IWordMap {
context.sensitiveWordMap(innerWordMap);
// 责任链模式调用
- return Instances.singleton(SensitiveCheckChain.class)
+ return context.sensitiveCheck()
.sensitiveCheck(txt, beginIndex, validModeEnum, context);
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/map/WordMaps.java b/src/main/java/com/github/houbb/sensitive/word/support/map/WordMaps.java
new file mode 100644
index 0000000..48a8181
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/support/map/WordMaps.java
@@ -0,0 +1,24 @@
+package com.github.houbb.sensitive.word.support.map;
+
+import com.github.houbb.sensitive.word.api.IWordMap;
+
+/**
+ * 敏感词 map
+ *
+ * @author binbin.hou
+ * @since 0.3.0
+ */
+public final class WordMaps {
+
+ private WordMaps(){}
+
+ /**
+ * 默认策略
+ * @return 策略
+ * @since 0.3.0
+ */
+ public static IWordMap defaults() {
+ return new SensitiveWordMap();
+ }
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java
index 4893eee..02de3ec 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java
@@ -1,6 +1,7 @@
package com.github.houbb.sensitive.word.support.replace;
import com.github.houbb.heaven.annotation.ThreadSafe;
+import com.github.houbb.heaven.constant.CharConst;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
@@ -13,12 +14,20 @@ import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
@ThreadSafe
public class SensitiveWordReplaceChar implements ISensitiveWordReplace {
+ /**
+ * 替换的字符
+ * @since 0.3.0
+ */
private final char replaceChar;
public SensitiveWordReplaceChar(char replaceChar) {
this.replaceChar = replaceChar;
}
+ public SensitiveWordReplaceChar() {
+ this(CharConst.STAR);
+ }
+
@Override
public String replace(ISensitiveWordReplaceContext context) {
int wordLength = context.wordLength();
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaces.java b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaces.java
new file mode 100644
index 0000000..552547e
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaces.java
@@ -0,0 +1,34 @@
+package com.github.houbb.sensitive.word.support.replace;
+
+import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
+
+/**
+ * 字符替换策略工具类
+ *
+ * @author binbin.hou
+ * @since 0.3.0
+ */
+public final class SensitiveWordReplaces {
+
+ private SensitiveWordReplaces(){}
+
+ /**
+ * 字符
+ * @param c 字符
+ * @return 结果
+ * @since 0.3.0
+ */
+ public static ISensitiveWordReplace chars(final char c) {
+ return new SensitiveWordReplaceChar(c);
+ }
+
+ /**
+ * 字符,默认为 *
+ * @return 结果
+ * @since 0.3.0
+ */
+ public static ISensitiveWordReplace chars() {
+ return new SensitiveWordReplaceChar();
+ }
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java
index a34b5c2..67f110f 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java
@@ -12,6 +12,15 @@ import com.github.houbb.sensitive.word.api.IWordResultHandler;
@ThreadSafe
public class WordResultHandlerRaw implements IWordResultHandler {
+ /**
+ * @since 0.3.0
+ */
+ private static final WordResultHandlerRaw INSTANCE = new WordResultHandlerRaw();
+
+ public static WordResultHandlerRaw getInstance() {
+ return INSTANCE;
+ }
+
@Override
public IWordResult handle(IWordResult wordResult) {
return wordResult;
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java
index 9853c5a..7048759 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java
@@ -13,6 +13,15 @@ import com.github.houbb.sensitive.word.api.IWordResultHandler;
@ThreadSafe
public class WordResultHandlerWord implements IWordResultHandler {
+ /**
+ * @since 0.3.0
+ */
+ private static final WordResultHandlerWord INSTANCE = new WordResultHandlerWord();
+
+ public static WordResultHandlerWord getInstance() {
+ return INSTANCE;
+ }
+
@Override
public String handle(IWordResult wordResult) {
if(wordResult == null) {
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlers.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlers.java
index 599cb87..b77e6b7 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlers.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlers.java
@@ -1,6 +1,5 @@
package com.github.houbb.sensitive.word.support.result;
-import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.sensitive.word.api.IWordResult;
import com.github.houbb.sensitive.word.api.IWordResultHandler;
@@ -19,7 +18,7 @@ public final class WordResultHandlers {
* @since 0.1.0
*/
public static IWordResultHandler raw() {
- return Instances.singleton(WordResultHandlerRaw.class);
+ return WordResultHandlerRaw.getInstance();
}
/**
@@ -28,7 +27,7 @@ public final class WordResultHandlers {
* @since 0.1.0
*/
public static IWordResultHandler word() {
- return Instances.singleton(WordResultHandlerWord.class);
+ return WordResultHandlerWord.getInstance();
}
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/CharUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java
similarity index 96%
rename from src/main/java/com/github/houbb/sensitive/word/utils/CharUtils.java
rename to src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java
index c3fcc3a..f4b9e8b 100644
--- a/src/main/java/com/github/houbb/sensitive/word/utils/CharUtils.java
+++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java
@@ -12,9 +12,9 @@ import java.util.Map;
* @author Administrator
* @since 0.0.4
*/
-public final class CharUtils {
+public final class InnerCharUtils {
- private CharUtils() {
+ private InnerCharUtils() {
}
/**
diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java
index 295b401..d289e5a 100644
--- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java
+++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java
@@ -1,10 +1,12 @@
package com.github.houbb.sensitive.word.utils;
-import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.lang.StringUtil;
+import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.ICharFormat;
import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.support.format.CharFormatChain;
+
+import java.util.ArrayList;
+import java.util.List;
/**
* 内部格式化工具类
@@ -21,13 +23,13 @@ public final class InnerFormatUtils {
* @return 结果
* @since 0.1.1
*/
- public static String format(String original, IWordContext context) {
+ public static String format(final String original, final IWordContext context) {
if(StringUtil.isEmpty(original)) {
return original;
}
StringBuilder stringBuilder = new StringBuilder();
- ICharFormat charFormat = Instances.singleton(CharFormatChain.class);
+ ICharFormat charFormat = context.charFormat();
char[] chars = original.toCharArray();
for(char c : chars) {
char cf = charFormat.format(c, context);
@@ -37,4 +39,26 @@ public final class InnerFormatUtils {
return stringBuilder.toString();
}
+ /**
+ * 格式化列表
+ * @param list 列表
+ * @param context 上下文
+ * @return 结果
+ * @since 0。3.0
+ */
+ public static List formatWordList(List list,
+ final IWordContext context) {
+ if(CollectionUtil.isEmpty(list)) {
+ return list;
+ }
+
+ List resultList = new ArrayList<>(list.size());
+ for(String word : list) {
+ String formatWord = InnerFormatUtils.format(word, context);
+ resultList.add(formatWord);
+ }
+
+ return resultList;
+ }
+
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/NumUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerNumUtils.java
similarity index 98%
rename from src/main/java/com/github/houbb/sensitive/word/utils/NumUtils.java
rename to src/main/java/com/github/houbb/sensitive/word/utils/InnerNumUtils.java
index 486bbe5..62b40c1 100644
--- a/src/main/java/com/github/houbb/sensitive/word/utils/NumUtils.java
+++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerNumUtils.java
@@ -15,9 +15,9 @@ import java.util.Map;
* @author Administrator
* @since 0.0.4
*/
-public final class NumUtils {
+public final class InnerNumUtils {
- private NumUtils(){}
+ private InnerNumUtils(){}
private static final String NUM_ONE = "⓪0零º₀⓿○" +
"123456789" +
diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordDataUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordDataUtils.java
new file mode 100644
index 0000000..3df64f6
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordDataUtils.java
@@ -0,0 +1,48 @@
+package com.github.houbb.sensitive.word.utils;
+
+import com.github.houbb.heaven.util.util.CollectionUtil;
+import com.github.houbb.sensitive.word.api.IWordContext;
+
+import java.util.*;
+
+/**
+ * 数据工具包
+ * @since 0.3.0
+ */
+public final class InnerWordDataUtils {
+
+ private InnerWordDataUtils(){}
+
+ /**
+ * 获取禁止列表中真正的禁止词汇
+ * @param denyList 禁止
+ * @param allowList 允许
+ * @return 结果
+ * @since 0.3.0
+ */
+ public static List getActualDenyList(List denyList, List allowList,
+ final IWordContext context) {
+ if(CollectionUtil.isEmpty(denyList)) {
+ return Collections.emptyList();
+ }
+ if(CollectionUtil.isEmpty(allowList)) {
+ return denyList;
+ }
+
+ List formatDenyList = InnerFormatUtils.formatWordList(denyList, context);
+ List formatAllowList = InnerFormatUtils.formatWordList(allowList, context);
+
+ List resultList = new ArrayList<>();
+ // O(1)
+ Set allowSet = new HashSet<>(formatAllowList);
+
+ for(String deny : formatDenyList) {
+ if(allowSet.contains(deny)) {
+ continue;
+ }
+
+ resultList.add(deny);
+ }
+ return resultList;
+ }
+}
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsChineseTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsChineseTest.java
index 5fa0485..b157b04 100644
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsChineseTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsChineseTest.java
@@ -22,7 +22,7 @@ public class SensitiveWordBsChineseTest {
public void ignoreChineseStyleTest() {
final String text = "我爱我的祖国和五星紅旗。";
- List wordList = SensitiveWordBs.newInstance().findAll(text);
+ List wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[祖国, 五星紅旗]", wordList.toString());
}
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEmailTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEmailTest.java
index c930774..b800fc1 100644
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEmailTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEmailTest.java
@@ -22,7 +22,7 @@ public class SensitiveWordBsEmailTest {
public void emailEnglishTest() {
final String text = "楼主好人,邮箱 sensitiveword@xx.com";
- List wordList = SensitiveWordBs.newInstance().findAll(text);
+ List wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[邮箱, sensitiveword@xx.com]", wordList.toString());
}
@@ -34,7 +34,7 @@ public class SensitiveWordBsEmailTest {
public void emailNumberTest() {
final String text = "楼主好人,邮箱 123456789@xx.com";
- List wordList = SensitiveWordBs.newInstance().findAll(text);
+ List wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[邮箱, 123456789, xx.com]", wordList.toString());
}
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEnglishTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEnglishTest.java
index 78ccb9a..b9c59e4 100644
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEnglishTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEnglishTest.java
@@ -22,7 +22,7 @@ public class SensitiveWordBsEnglishTest {
public void ignoreEnglishStyleTest() {
final String text = "Ⓕⓤc⒦ the bad words";
- List wordList = SensitiveWordBs.newInstance().findAll(text);
+ List wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[Ⓕⓤc⒦]", wordList.toString());
}
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumLenTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumLenTest.java
index 8383834..738591b 100644
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumLenTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumLenTest.java
@@ -23,12 +23,13 @@ public class SensitiveWordBsNumLenTest {
final String text = "你懂得:12345678";
// 默认检测 8 位
- List wordList = SensitiveWordBs.newInstance().findAll(text);
+ List wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[12345678]", wordList.toString());
// 指定数字的长度,避免误杀
List wordList2 = SensitiveWordBs.newInstance()
.numCheckLen(9)
+ .init()
.findAll(text);
Assert.assertEquals("[]", wordList2.toString());
}
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumTest.java
index c269b71..e7a9e0e 100644
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumTest.java
@@ -22,7 +22,7 @@ public class SensitiveWordBsNumTest {
public void findAllTest() {
final String text = "这个是我的微信:9989123456";
- List wordList = SensitiveWordBs.newInstance().findAll(text);
+ List wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[微信, 9989123456]", wordList.toString());
}
@@ -34,7 +34,7 @@ public class SensitiveWordBsNumTest {
public void ignoreNumStyleTest() {
final String text = "这个是我的微信:9⓿二肆⁹₈③⑸⒋➃㈤㊄";
- List wordList = SensitiveWordBs.newInstance().findAll(text);
+ List wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[微信, 9⓿二肆⁹₈③⑸⒋➃㈤㊄]", wordList.toString());
}
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsRepeatTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsRepeatTest.java
index 5621f4f..82ac54f 100644
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsRepeatTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsRepeatTest.java
@@ -24,6 +24,7 @@ public class SensitiveWordBsRepeatTest {
List wordList = SensitiveWordBs.newInstance()
.ignoreRepeat(true)
+ .init()
.findAll(text);
Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString());
}
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java
index 7067d5c..979e25d 100644
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java
@@ -2,6 +2,7 @@ package com.github.houbb.sensitive.word.bs;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
+import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaces;
import org.junit.Assert;
import org.junit.Test;
@@ -24,7 +25,7 @@ public class SensitiveWordBsTest {
public void containsTest() {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
- Assert.assertTrue(SensitiveWordBs.newInstance().contains(text));
+ Assert.assertTrue(SensitiveWordBs.newInstance().init().contains(text));
}
/**
@@ -35,7 +36,7 @@ public class SensitiveWordBsTest {
public void findAllTest() {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
- List wordList = SensitiveWordBs.newInstance().findAll(text);
+ List wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString());
}
@@ -47,7 +48,7 @@ public class SensitiveWordBsTest {
public void findFirstTest() {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
- String word = SensitiveWordBs.newInstance().findFirst(text);
+ String word = SensitiveWordBs.newInstance().init().findFirst(text);
Assert.assertEquals("五星红旗", word);
}
@@ -59,7 +60,7 @@ public class SensitiveWordBsTest {
public void replaceTest() {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
- String result = SensitiveWordBs.newInstance().replace(text);
+ String result = SensitiveWordBs.newInstance().init().replace(text);
Assert.assertEquals("****迎风飘扬,***的画像屹立在***前。", result);
}
@@ -71,7 +72,10 @@ public class SensitiveWordBsTest {
public void replaceCharTest() {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
- String result = SensitiveWordBs.newInstance().replace(text, '0');
+ String result = SensitiveWordBs.newInstance()
+ .sensitiveWordReplace(SensitiveWordReplaces.chars('0'))
+ .init()
+ .replace(text);
Assert.assertEquals("0000迎风飘扬,000的画像屹立在000前。", result);
}
@@ -83,7 +87,7 @@ public class SensitiveWordBsTest {
public void ignoreCaseTest() {
final String text = "fuCK the bad words.";
- String word = SensitiveWordBs.newInstance().findFirst(text);
+ String word = SensitiveWordBs.newInstance().init().findFirst(text);
Assert.assertEquals("fuCK", word);
}
@@ -95,7 +99,7 @@ public class SensitiveWordBsTest {
public void ignoreWidthTest() {
final String text = "fuck the bad words.";
- String word = SensitiveWordBs.newInstance().findFirst(text);
+ String word = SensitiveWordBs.newInstance().init().findFirst(text);
Assert.assertEquals("fuck", word);
}
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlTest.java
index e13a1c7..8f67774 100644
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlTest.java
@@ -22,11 +22,13 @@ public class SensitiveWordBsUrlTest {
public void commonUrlTest() {
final String text = "点击链接 www.baidu.com查看答案";
- List wordList = SensitiveWordBs.newInstance().findAll(text);
+ List wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[链接, www.baidu.com]", wordList.toString());
Assert.assertEquals("点击** *************查看答案", SensitiveWordBs
- .newInstance().replace(text));
+ .newInstance()
+ .init()
+ .replace(text));
}
/**
@@ -41,10 +43,10 @@ public class SensitiveWordBsUrlTest {
public void imageUrlTest() {
final String text = "双击查看大图 www.big-image.png查看";
- List wordList = SensitiveWordBs.newInstance().findAll(text);
+ List wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[www.big-image.png]", wordList.toString());
- Assert.assertEquals(text, SensitiveWordBs.newInstance().replace(text));
+ Assert.assertEquals(text, SensitiveWordBs.newInstance().init().replace(text));
}
}
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUserDefineTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUserDefineTest.java
index 4978c34..b5c94ee 100644
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUserDefineTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUserDefineTest.java
@@ -22,7 +22,7 @@ public class SensitiveWordBsUserDefineTest {
public void allowAndDenyTest() {
final String text = "gender 我们认为应该通过,自定义敏感词我们认为应该拒绝。";
- List wordList = SensitiveWordBs.newInstance().findAll(text);
+ List wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[自定义敏感词]", wordList.toString());
}
diff --git a/src/test/java/com/github/houbb/sensitive/word/data/DictNumTest.java b/src/test/java/com/github/houbb/sensitive/word/data/DictNumTest.java
index a4dbb7a..07e56e8 100644
--- a/src/test/java/com/github/houbb/sensitive/word/data/DictNumTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/data/DictNumTest.java
@@ -1,12 +1,6 @@
package com.github.houbb.sensitive.word.data;
-import com.github.houbb.heaven.support.handler.IHandler;
import com.github.houbb.heaven.util.io.FileUtil;
-import com.github.houbb.heaven.util.lang.StringUtil;
-import com.github.houbb.heaven.util.util.CollectionUtil;
-import com.github.houbb.opencc4j.core.impl.ZhConvertBootstrap;
-import com.github.houbb.opencc4j.support.segment.impl.CharSegment;
-import com.github.houbb.sensitive.word.utils.NumUtils;
import org.junit.Ignore;
import org.junit.Test;
diff --git a/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java b/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java
index c67e901..a6844ef 100644
--- a/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java
@@ -3,12 +3,11 @@ package com.github.houbb.sensitive.word.data;
import com.github.houbb.heaven.support.filter.IFilter;
import com.github.houbb.heaven.support.handler.IHandler;
import com.github.houbb.heaven.util.io.FileUtil;
-import com.github.houbb.heaven.util.lang.NumUtil;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.opencc4j.core.impl.ZhConvertBootstrap;
import com.github.houbb.opencc4j.support.segment.impl.CharSegment;
-import com.github.houbb.sensitive.word.utils.NumUtils;
+import com.github.houbb.sensitive.word.utils.InnerNumUtils;
import org.junit.Ignore;
import org.junit.Test;
@@ -128,7 +127,7 @@ public class DictSlimTest {
// 停顿词语
String trim = string.replaceAll("加|否|与|和", "");
- String mapString = NumUtils.getMappingString(trim);
+ String mapString = InnerNumUtils.getMappingString(trim);
boolean result = StringUtil.isDigit(mapString);
if(result) {
System.out.println(string);