diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md
index ba3fa56..f045cfc 100644
--- a/CHANGE_LOG.md
+++ b/CHANGE_LOG.md
@@ -126,3 +126,10 @@
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:---|:---|:---|:--|
| 1 | F | 自定义敏感词 allow/deny 进行格式化处理 | 2021-12-11 23:51:58 | |
+
+# release_0.2.0
+
+| 序号 | 变更类型 | 说明 | 时间 | 备注 |
+|:---|:---|:---|:---|:--|
+| 1 | A | 允许用户自定义替换策略 | 2022-01-15 23:51:58 | |
+| 2 | U | 升级二方数据库依赖 | 2022-01-15 23:51:58 | |
diff --git a/README.md b/README.md
index c792156..1d8746e 100644
--- a/README.md
+++ b/README.md
@@ -44,9 +44,9 @@
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/doc/CHANGE_LOG.md)
-v0.1.1 变更:
+v0.2.0 变更:
-- 敏感词自定义 Allow/Deny 进行格式化处理
+- 支持用户自定义替换策略
# 快速开始
@@ -62,7 +62,7 @@ v0.1.1 变更:
com.github.houbb
sensitive-word
- 0.1.1
+ 0.2.0
```
@@ -73,6 +73,7 @@ v0.1.1 变更:
| 方法 | 参数 | 返回值| 说明 |
|:---|:---|:---|:---|
| contains(String) | 待验证的字符串 | 布尔值 | 验证字符串是否包含敏感词 |
+| replace(String, ISensitiveWordReplace) | 使用指定的替换策略替换敏感词 | 字符串 | 返回脱敏后的字符串 |
| replace(String, char) | 使用指定的 char 替换敏感词 | 字符串 | 返回脱敏后的字符串 |
| replace(String) | 使用 `*` 替换敏感词 | 字符串 | 返回脱敏后的字符串 |
| findAll(String) | 待验证的字符串 | 字符串列表 | 返回字符串中所有敏感词 |
@@ -170,6 +171,58 @@ String result = SensitiveWordHelper.replace(text, '0');
Assert.assertEquals("0000迎风飘扬,000的画像屹立在000前。", result);
```
+### 自定义替换策略
+
+V0.2.0 支持该特性。
+
+场景说明:有时候我们希望不同的敏感词有不同的替换结果。比如【游戏】替换为【电子竞技】,【失业】替换为【灵活就业】。
+
+诚然,提前使用字符串的正则替换也可以,不过性能一般。
+
+使用例子:
+
+```java
+/**
+ * 自定替换策略
+ * @since 0.2.0
+ */
+@Test
+public void defineReplaceTest() {
+ final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
+
+ ISensitiveWordReplace replace = new MySensitiveWordReplace();
+ String result = SensitiveWordHelper.replace(text, replace);
+
+ Assert.assertEquals("国家旗帜迎风飘扬,教员的画像屹立在***前。", result);
+}
+```
+
+其中 `MySensitiveWordReplace` 是我们自定义的替换策略,实现如下:
+
+```java
+public class MySensitiveWordReplace implements ISensitiveWordReplace {
+
+ @Override
+ public String replace(ISensitiveWordReplaceContext context) {
+ String sensitiveWord = context.sensitiveWord();
+ // 自定义不同的敏感词替换策略,可以从数据库等地方读取
+ if("五星红旗".equals(sensitiveWord)) {
+ return "国家旗帜";
+ }
+ if("毛主席".equals(sensitiveWord)) {
+ return "教员";
+ }
+
+ // 其他默认使用 * 代替
+ int wordLength = context.wordLength();
+ return CharUtil.repeat('*', wordLength);
+ }
+
+}
+```
+
+我们针对其中的部分词做固定映射处理,其他的默认转换为 `*`。
+
# 更多特性
后续的诸多特性,主要是针对各种针对各种情况的处理,尽可能的提升敏感词命中率。
@@ -530,8 +583,6 @@ public class SensitiveWordService {
# 后期 road-map
-- 停顿词
-
- 同音字处理
- 形近字处理
@@ -542,7 +593,7 @@ public class SensitiveWordService {
- 敏感词标签支持
-- 邮箱后缀检测
+- [ ] DFA 数据结构的另一种实现
# 拓展阅读
@@ -552,4 +603,16 @@ public class SensitiveWordService {
[敏感词库优化流程](https://houbb.github.io/2020/01/07/sensitive-word-slim)
-[停止词的思考记录](https://houbb.github.io/2020/01/07/sensitive-word-stopword)
+[java 如何实现开箱即用的敏感词控台服务?](https://mp.weixin.qq.com/s/rQo75cfMU_OEbTJa0JGMGg)
+
+
+
+# 相关开源库
+
+[heaven 基础工具包](https://github.com/houbb/heaven)
+
+[opencc4j 繁简体转换](https://github.com/houbb/opencc4j)
+
+[pinyin 拼音工具](https://github.com/houbb/pinyin)
+
+[nlp-hanzi-similar 汉字相似度工具](https://github.com/houbb/nlp-hanzi-similar)
diff --git a/WECHAT.png b/WECHAT.png
new file mode 100644
index 0000000..ff0aedb
Binary files /dev/null and b/WECHAT.png differ
diff --git a/pom.xml b/pom.xml
index 2faadb4..66eb642 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
com.github.houbb
sensitive-word
- 0.1.1
+ 0.2.0
@@ -25,8 +25,8 @@
1.7
- 0.1.148
- 1.7.1
+ 0.1.154
+ 1.7.2
4.13.1
@@ -104,7 +104,7 @@
${project.compiler.level}
${project.compiler.level}
${project.build.sourceEncoding}
- -proc:none
+
@@ -255,4 +255,4 @@
-
\ No newline at end of file
+
diff --git a/release.bat b/release.bat
index fb5391f..463f219 100644
--- a/release.bat
+++ b/release.bat
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
-SET version=0.1.1
+SET version=0.2.0
:::: 新版本名称
-SET newVersion=0.1.2
+SET newVersion=0.2.1
:::: 组织名称
SET groupName=com.github.houbb
:::: 项目名称
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordReplace.java b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordReplace.java
new file mode 100644
index 0000000..429e435
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordReplace.java
@@ -0,0 +1,19 @@
+package com.github.houbb.sensitive.word.api;
+
+/**
+ * 敏感词替换策略
+ *
+ * @author binbin.hou
+ * @since 0.2.0
+ */
+public interface ISensitiveWordReplace {
+
+ /**
+ * 替换
+ * @param context 上下文
+ * @return 结果
+ * @since 0.2.0
+ */
+ String replace(ISensitiveWordReplaceContext context);
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordReplaceContext.java b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordReplaceContext.java
new file mode 100644
index 0000000..fce6da0
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordReplaceContext.java
@@ -0,0 +1,25 @@
+package com.github.houbb.sensitive.word.api;
+
+/**
+ * 敏感词替换策略上下文
+ *
+ * @author binbin.hou
+ * @since 0.2.0
+ */
+public interface ISensitiveWordReplaceContext {
+
+ /**
+ * 敏感词
+ * @return 敏感词
+ * @since 0.2.0
+ */
+ String sensitiveWord();
+
+ /**
+ * 单词长度
+ * @return 单词长度
+ * @since 0.2.0
+ */
+ int wordLength();
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
index 9bf2140..238c15d 100644
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
@@ -59,12 +59,13 @@ public interface IWordMap extends ISensitiveCheck {
* ps: 这里可以添加优化。
*
* @param target 目标字符串
- * @param replaceChar 替换为的 char
+ * @param replace 替换策略
* @param context 上下文
* @return 替换后结果
* @since 0.0.2
*/
- String replace(final String target, final char replaceChar,
+ String replace(final String target,
+ final ISensitiveWordReplace replace,
final IWordContext context);
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
index 4d7c627..33cb4be 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
@@ -8,6 +8,7 @@ import com.github.houbb.sensitive.word.api.*;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
import com.github.houbb.sensitive.word.support.map.SensitiveWordMap;
+import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceChar;
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
import com.github.houbb.sensitive.word.utils.InnerFormatUtils;
@@ -393,9 +394,23 @@ public class SensitiveWordBs {
* @since 0.0.2
*/
public String replace(final String target, final char replaceChar) {
+ ISensitiveWordReplace replace = new SensitiveWordReplaceChar(replaceChar);
+
+ return replace(target, replace);
+ }
+
+ /**
+ * 替换所有内容
+ *
+ * @param target 目标字符串
+ * @param replace 替换策略
+ * @return 替换后结果
+ * @since 0.2.0
+ */
+ public String replace(final String target, final ISensitiveWordReplace replace) {
statusCheck();
- return sensitiveWordMap.replace(target, replaceChar, context);
+ return sensitiveWordMap.replace(target, replace, context);
}
/**
diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java
index c435efe..2fe076b 100644
--- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java
+++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java
@@ -1,5 +1,6 @@
package com.github.houbb.sensitive.word.core;
+import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
import com.github.houbb.sensitive.word.api.IWordResultHandler;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
@@ -59,6 +60,18 @@ public final class SensitiveWordHelper {
return WORD_BS.findFirst(target);
}
+ /**
+ * 替换所有内容
+ *
+ * @param target 目标字符串
+ * @param replace 替换策略
+ * @return 替换后结果
+ * @since 0.2.0
+ */
+ public static String replace(final String target, final ISensitiveWordReplace replace) {
+ return WORD_BS.replace(target, replace);
+ }
+
/**
* 替换所有内容
*
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
index 683ee10..18e34da 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
@@ -9,14 +9,13 @@ import com.github.houbb.heaven.util.lang.ObjectUtil;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.heaven.util.util.MapUtil;
-import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.api.IWordMap;
-import com.github.houbb.sensitive.word.api.IWordResult;
+import com.github.houbb.sensitive.word.api.*;
import com.github.houbb.sensitive.word.constant.AppConst;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckChain;
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl;
+import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceContext;
import com.github.houbb.sensitive.word.support.result.WordResult;
import java.util.Collection;
@@ -156,12 +155,12 @@ public class SensitiveWordMap implements IWordMap {
}
@Override
- public String replace(String target, char replaceChar, final IWordContext context) {
+ public String replace(String target, final ISensitiveWordReplace replace, final IWordContext context) {
if(StringUtil.isEmpty(target)) {
return target;
}
- return this.replaceSensitiveWord(target, replaceChar, context);
+ return this.replaceSensitiveWord(target, replace, context);
}
/**
@@ -213,11 +212,13 @@ public class SensitiveWordMap implements IWordMap {
/**
* 直接替换敏感词,返回替换后的结果
* @param target 文本信息
+ * @param replace 替换策略
+ * @param context 上下文
* @return 脱敏后的字符串
* @since 0.0.2
*/
private String replaceSensitiveWord(final String target,
- final char replaceChar,
+ final ISensitiveWordReplace replace,
final IWordContext context) {
if(StringUtil.isEmpty(target)) {
return target;
@@ -241,7 +242,12 @@ public class SensitiveWordMap implements IWordMap {
// 直接使用原始内容,避免 markdown 图片转换失败
resultBuilder.append(string);
} else {
- String replaceStr = CharUtil.repeat(replaceChar, wordLength);
+ // 创建上下文
+ ISensitiveWordReplaceContext replaceContext = SensitiveWordReplaceContext.newInstance()
+ .sensitiveWord(string)
+ .wordLength(wordLength);
+ String replaceStr = replace.replace(replaceContext);
+
resultBuilder.append(replaceStr);
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java
new file mode 100644
index 0000000..4893eee
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java
@@ -0,0 +1,29 @@
+package com.github.houbb.sensitive.word.support.replace;
+
+import com.github.houbb.heaven.annotation.ThreadSafe;
+import com.github.houbb.heaven.util.lang.CharUtil;
+import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
+import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
+
+/**
+ * 指定字符的替换策略
+ * @author binbin.hou
+ * @since 0.2.0
+ */
+@ThreadSafe
+public class SensitiveWordReplaceChar implements ISensitiveWordReplace {
+
+ private final char replaceChar;
+
+ public SensitiveWordReplaceChar(char replaceChar) {
+ this.replaceChar = replaceChar;
+ }
+
+ @Override
+ public String replace(ISensitiveWordReplaceContext context) {
+ int wordLength = context.wordLength();
+
+ return CharUtil.repeat(replaceChar, wordLength);
+ }
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceContext.java b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceContext.java
new file mode 100644
index 0000000..31c67b1
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceContext.java
@@ -0,0 +1,57 @@
+package com.github.houbb.sensitive.word.support.replace;
+
+import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
+
+/**
+ * 敏感词替换上下文
+ *
+ * @author binbin.hou
+ * @since 0.2.0
+ */
+public class SensitiveWordReplaceContext implements ISensitiveWordReplaceContext {
+
+ public static SensitiveWordReplaceContext newInstance() {
+ return new SensitiveWordReplaceContext();
+ }
+
+ /**
+ * 敏感词
+ * @since 0.2.0
+ */
+ private String sensitiveWord;
+
+ /**
+ * 单词长度
+ * @since 0.2.0
+ */
+ private int wordLength;
+
+ @Override
+ public String sensitiveWord() {
+ return sensitiveWord;
+ }
+
+ public SensitiveWordReplaceContext sensitiveWord(String sensitiveWord) {
+ this.sensitiveWord = sensitiveWord;
+ return this;
+ }
+
+ @Override
+ public int wordLength() {
+ return wordLength;
+ }
+
+ public SensitiveWordReplaceContext wordLength(int wordLength) {
+ this.wordLength = wordLength;
+ return this;
+ }
+
+ @Override
+ public String toString() {
+ return "SensitiveWordReplaceContext{" +
+ "sensitiveWord='" + sensitiveWord + '\'' +
+ ", wordLength=" + wordLength +
+ '}';
+ }
+
+}
diff --git a/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java
index 978b009..9d82823 100644
--- a/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java
@@ -1,6 +1,8 @@
package com.github.houbb.sensitive.word.core;
+import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
import com.github.houbb.sensitive.word.api.IWordResult;
+import com.github.houbb.sensitive.word.replace.MySensitiveWordReplace;
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
import org.junit.Assert;
import org.junit.Test;
@@ -148,4 +150,18 @@ public class SensitiveWordHelperTest {
Assert.assertEquals("fuck", word);
}
+ /**
+ * 自定替换策略
+ * @since 0.2.0
+ */
+ @Test
+ public void defineReplaceTest() {
+ final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
+
+ ISensitiveWordReplace replace = new MySensitiveWordReplace();
+ String result = SensitiveWordHelper.replace(text, replace);
+
+ Assert.assertEquals("国家旗帜迎风飘扬,教员的画像屹立在***前。", result);
+ }
+
}
diff --git a/src/test/java/com/github/houbb/sensitive/word/replace/MySensitiveWordReplace.java b/src/test/java/com/github/houbb/sensitive/word/replace/MySensitiveWordReplace.java
new file mode 100644
index 0000000..32d6a8e
--- /dev/null
+++ b/src/test/java/com/github/houbb/sensitive/word/replace/MySensitiveWordReplace.java
@@ -0,0 +1,31 @@
+package com.github.houbb.sensitive.word.replace;
+
+import com.github.houbb.heaven.util.lang.CharUtil;
+import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
+import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext;
+
+/**
+ * 自定义敏感词替换策略
+ *
+ * @author binbin.hou
+ * @since 0.2.0
+ */
+public class MySensitiveWordReplace implements ISensitiveWordReplace {
+
+ @Override
+ public String replace(ISensitiveWordReplaceContext context) {
+ String sensitiveWord = context.sensitiveWord();
+ // 自定义不同的敏感词替换策略,可以从数据库等地方读取
+ if("五星红旗".equals(sensitiveWord)) {
+ return "国家旗帜";
+ }
+ if("毛主席".equals(sensitiveWord)) {
+ return "教员";
+ }
+
+ // 其他默认使用 * 代替
+ int wordLength = context.wordLength();
+ return CharUtil.repeat('*', wordLength);
+ }
+
+}