mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 00:17:35 +08:00
release branch 0.0.13
This commit is contained in:
@@ -93,3 +93,10 @@
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:---|:---|:---|:--|
|
||||
| 1 | A | 添加对于网址的过滤 | 2020-1-16 20:51:58 | |
|
||||
|
||||
# release_0.0.13
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:---|:---|:---|:--|
|
||||
| 1 | A | 新增 Helper 工具类 | 2021-5-12 20:51:58 | |
|
||||
| 2 | A | 新增动态词库初始化支持 | 2021-5-12 20:51:58 | |
|
||||
109
README.md
109
README.md
@@ -3,7 +3,7 @@
|
||||
[sensitive-word](https://github.com/houbb/sensitive-word) 基于 DFA 算法实现的高性能敏感词工具。
|
||||
|
||||
[](http://mvnrepository.com/artifact/com.github.houbb/sensitive-word)
|
||||
|
||||
[](https://github.com/houbb/sensitive-word)
|
||||
[](https://github.com/houbb/sensitive-word/blob/master/LICENSE.txt)
|
||||
|
||||
## 创作目的
|
||||
@@ -56,17 +56,16 @@
|
||||
<dependency>
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.0.12</version>
|
||||
<version>0.0.13</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
## api 概览
|
||||
|
||||
`SensitiveWordBs` 作为敏感词的引导类,核心方法如下:
|
||||
`SensitiveWordHelper` 作为敏感词的工具类,核心方法如下:
|
||||
|
||||
| 方法 | 参数 | 返回值| 说明 |
|
||||
|:---|:---|:---|:---|
|
||||
| newInstance() | 无 | 引导类 | 初始化引导类 |
|
||||
| contains(String) | 待验证的字符串 | 布尔值 | 验证字符串是否包含敏感词 |
|
||||
| findAll(String) | 待验证的字符串 | 字符串列表 | 返回字符串中所有敏感词 |
|
||||
| replace(String, char) | 使用指定的 char 替换敏感词 | 字符串 | 返回脱敏后的字符串 |
|
||||
@@ -74,14 +73,14 @@
|
||||
|
||||
## 使用实例
|
||||
|
||||
所有测试案例参见 [SensitiveWordBsTest](https://github.com/houbb/sensitive-word/blob/master/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java)
|
||||
所有测试案例参见 [SensitiveWordHelperTest](https://github.com/houbb/sensitive-word/blob/master/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java)
|
||||
|
||||
### 判断是否包含敏感词
|
||||
|
||||
```java
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
Assert.assertTrue(SensitiveWordBs.newInstance().contains(text));
|
||||
Assert.assertTrue(SensitiveWordHelper.contains(text));
|
||||
```
|
||||
|
||||
### 返回第一个敏感词
|
||||
@@ -89,7 +88,7 @@ Assert.assertTrue(SensitiveWordBs.newInstance().contains(text));
|
||||
```java
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
String word = SensitiveWordBs.newInstance().findFirst(text);
|
||||
String word = SensitiveWordHelper.findFirst(text);
|
||||
Assert.assertEquals("五星红旗", word);
|
||||
```
|
||||
|
||||
@@ -98,7 +97,7 @@ Assert.assertEquals("五星红旗", word);
|
||||
```java
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordHelper.findAll(text);
|
||||
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString());
|
||||
```
|
||||
|
||||
@@ -106,7 +105,7 @@ Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString())
|
||||
|
||||
```java
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
String result = SensitiveWordBs.newInstance().replace(text);
|
||||
String result = SensitiveWordHelper.replace(text);
|
||||
Assert.assertEquals("****迎风飘扬,***的画像屹立在***前。", result);
|
||||
```
|
||||
|
||||
@@ -114,7 +113,7 @@ Assert.assertEquals("****迎风飘扬,***的画像屹立在***前。", result)
|
||||
|
||||
```java
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
String result = SensitiveWordBs.newInstance().replace(text, '0');
|
||||
String result = SensitiveWordHelper.replace(text, '0');
|
||||
Assert.assertEquals("0000迎风飘扬,000的画像屹立在000前。", result);
|
||||
```
|
||||
|
||||
@@ -129,7 +128,7 @@ Assert.assertEquals("0000迎风飘扬,000的画像屹立在000前。", result)
|
||||
```java
|
||||
final String text = "fuCK the bad words.";
|
||||
|
||||
String word = SensitiveWordBs.newInstance().findFirst(text);
|
||||
String word = SensitiveWordHelper.findFirst(text);
|
||||
Assert.assertEquals("fuCK", word);
|
||||
```
|
||||
|
||||
@@ -138,7 +137,7 @@ Assert.assertEquals("fuCK", word);
|
||||
```java
|
||||
final String text = "fuck the bad words.";
|
||||
|
||||
String word = SensitiveWordBs.newInstance().findFirst(text);
|
||||
String word = SensitiveWordHelper.findFirst(text);
|
||||
Assert.assertEquals("fuck", word);
|
||||
```
|
||||
|
||||
@@ -149,7 +148,7 @@ Assert.assertEquals("fuck", word);
|
||||
```java
|
||||
final String text = "这个是我的微信:9⓿二肆⁹₈③⑸⒋➃㈤㊄";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordHelper.findAll(text);
|
||||
Assert.assertEquals("[9⓿二肆⁹₈③⑸⒋➃㈤㊄]", wordList.toString());
|
||||
```
|
||||
|
||||
@@ -158,7 +157,7 @@ Assert.assertEquals("[9⓿二肆⁹₈③⑸⒋➃㈤㊄]", wordList.toString())
|
||||
```java
|
||||
final String text = "我爱我的祖国和五星紅旗。";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordHelper.findAll(text);
|
||||
Assert.assertEquals("[五星紅旗]", wordList.toString());
|
||||
```
|
||||
|
||||
@@ -167,7 +166,7 @@ Assert.assertEquals("[五星紅旗]", wordList.toString());
|
||||
```java
|
||||
final String text = "Ⓕⓤc⒦ the bad words";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordHelper.findAll(text);
|
||||
Assert.assertEquals("[Ⓕⓤc⒦]", wordList.toString());
|
||||
```
|
||||
|
||||
@@ -176,7 +175,7 @@ Assert.assertEquals("[Ⓕⓤc⒦]", wordList.toString());
|
||||
```java
|
||||
final String text = "ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦ the bad words";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordHelper.findAll(text);
|
||||
Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString());
|
||||
```
|
||||
|
||||
@@ -185,7 +184,7 @@ Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString());
|
||||
```java
|
||||
final String text = "楼主好人,邮箱 sensitiveword@xx.com";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordHelper.findAll(text);
|
||||
Assert.assertEquals("[sensitiveword@xx.com]", wordList.toString());
|
||||
```
|
||||
|
||||
@@ -209,10 +208,84 @@ Assert.assertEquals("[sensitiveword@xx.com]", wordList.toString());
|
||||
```java
|
||||
final String text = "gender 我们认为应该通过,自定义敏感词我们认为应该拒绝。";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().findAll(text);
|
||||
List<String> wordList = SensitiveWordHelper.findAll(text);
|
||||
Assert.assertEquals("[自定义敏感词]", wordList.toString());
|
||||
```
|
||||
|
||||
# 动态加载
|
||||
|
||||
## 情景说明
|
||||
|
||||
有时候我们希望将敏感词的加载设计成动态的,比如控台修改,然后可以实时生效。
|
||||
|
||||
v0.0.13 支持了这种特性。
|
||||
|
||||
## 接口说明
|
||||
|
||||
为了实现这个特性,并且兼容以前的功能,我们定义了两个接口。
|
||||
|
||||
### IWordDeny
|
||||
|
||||
接口如下,可以自定义自己的实现。
|
||||
|
||||
```java
|
||||
/**
|
||||
* 拒绝出现的数据-返回的内容被当做是敏感词
|
||||
* @author binbin.hou
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public interface IWordDeny {
|
||||
|
||||
/**
|
||||
* 获取结果
|
||||
* @return 结果
|
||||
* @since 0.0.13
|
||||
*/
|
||||
List<String> deny();
|
||||
|
||||
}
|
||||
```
|
||||
|
||||
### IWordAllow
|
||||
|
||||
接口如下,可以自定义自己的实现。
|
||||
|
||||
```java
|
||||
/**
|
||||
* 允许的内容-返回的内容不被当做敏感词
|
||||
* @author binbin.hou
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public interface IWordAllow {
|
||||
|
||||
/**
|
||||
* 获取结果
|
||||
* @return 结果
|
||||
* @since 0.0.13
|
||||
*/
|
||||
List<String> allow();
|
||||
|
||||
}
|
||||
```
|
||||
|
||||
## 配置使用
|
||||
|
||||
为了让使用更加优雅,我们设计了引导类 `SensitiveWordBs`。
|
||||
|
||||
可以通过 wordDeny() 指定敏感词,wordAllow() 指定非敏感词,通过 init() 初始化敏感词字典。
|
||||
|
||||
```java
|
||||
SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
|
||||
.wordDeny(WordDenys.system())
|
||||
.wordAllow(WordAllows.system())
|
||||
.init();
|
||||
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
Assert.assertTrue(wordBs.contains(text));
|
||||
```
|
||||
|
||||
备注:init() 对于敏感词 DFA 的构建是比较耗时的,一般建议在应用初始化的时候**只初始化一次**。而不是重复初始化!
|
||||
|
||||
# 后期 road-map
|
||||
|
||||
- 停顿词
|
||||
|
||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.0.13-SNAPSHOT</version>
|
||||
<version>0.0.13</version>
|
||||
|
||||
<properties>
|
||||
<!--============================== All Plugins START ==============================-->
|
||||
|
||||
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
|
||||
|
||||
:: 版本号信息(需要手动指定)
|
||||
:::: 旧版本名称
|
||||
SET version=0.0.12
|
||||
SET version=0.0.13
|
||||
:::: 新版本名称
|
||||
SET newVersion=0.0.13
|
||||
SET newVersion=0.0.14
|
||||
:::: 组织名称
|
||||
SET groupName=com.github.houbb
|
||||
:::: 项目名称
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
package com.github.houbb.sensitive.word.api;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 允许的内容-返回的内容不被当做敏感词
|
||||
* @author binbin.hou
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public interface IWordAllow {
|
||||
|
||||
/**
|
||||
* 获取结果
|
||||
* @return 结果
|
||||
* @since 0.0.13
|
||||
*/
|
||||
List<String> allow();
|
||||
|
||||
}
|
||||
@@ -7,6 +7,7 @@ import java.util.List;
|
||||
* @author binbin.hou
|
||||
* @since 0.0.1
|
||||
*/
|
||||
@Deprecated
|
||||
public interface IWordData {
|
||||
|
||||
/**
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
package com.github.houbb.sensitive.word.api;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 拒绝出现的数据-返回的内容被当做是敏感词
|
||||
* @author binbin.hou
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public interface IWordDeny {
|
||||
|
||||
/**
|
||||
* 获取结果
|
||||
* @return 结果
|
||||
* @since 0.0.13
|
||||
*/
|
||||
List<String> deny();
|
||||
|
||||
}
|
||||
@@ -1,10 +1,13 @@
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import com.github.houbb.heaven.constant.CharConst;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordData;
|
||||
import com.github.houbb.sensitive.word.api.IWordMap;
|
||||
import com.github.houbb.heaven.util.common.ArgUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.sensitive.word.api.*;
|
||||
import com.github.houbb.sensitive.word.exception.SensitiveWordException;
|
||||
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
||||
import com.github.houbb.sensitive.word.support.data.SensitiveWordData;
|
||||
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||
import com.github.houbb.sensitive.word.support.map.SensitiveWordMap;
|
||||
|
||||
import java.util.List;
|
||||
@@ -30,37 +33,42 @@ public class SensitiveWordBs {
|
||||
*
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private static volatile IWordMap sensitiveWordMap;
|
||||
private IWordMap sensitiveWordMap;
|
||||
|
||||
/**
|
||||
* 默认的执行上下文
|
||||
*
|
||||
* @since 0.0.4
|
||||
*/
|
||||
private volatile IWordContext context;
|
||||
private final IWordContext context = buildDefaultContext();
|
||||
|
||||
/**
|
||||
* 禁止的单词
|
||||
* @since 0.0.13
|
||||
*/
|
||||
private IWordDeny wordDeny = WordDenys.system();
|
||||
|
||||
/**
|
||||
* 允许的单词
|
||||
* @since 0.0.13
|
||||
*/
|
||||
private IWordAllow wordAllow = WordAllows.system();
|
||||
|
||||
/**
|
||||
* DCL 初始化 wordMap 信息
|
||||
*
|
||||
* @return 初始化后的结果
|
||||
* 注意:map 的构建是一个比较耗时的动作
|
||||
* @since 0.0.4
|
||||
*/
|
||||
private static IWordMap initWordMap() {
|
||||
if (sensitiveWordMap == null) {
|
||||
synchronized (IWordMap.class) {
|
||||
if (sensitiveWordMap == null) {
|
||||
// 加载配置信息
|
||||
IWordData wordData = new SensitiveWordData();
|
||||
List<String> lines = wordData.getWordData();
|
||||
private synchronized void initWordMap() {
|
||||
// 加载配置信息
|
||||
List<String> denyList = wordDeny.deny();
|
||||
List<String> allowList = wordAllow.allow();
|
||||
List<String> results = CollectionUtil.difference(denyList, allowList);
|
||||
|
||||
// 初始化 DFA 信息
|
||||
sensitiveWordMap = new SensitiveWordMap();
|
||||
sensitiveWordMap.initWordMap(lines);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sensitiveWordMap;
|
||||
// 初始化 DFA 信息
|
||||
sensitiveWordMap = new SensitiveWordMap();
|
||||
sensitiveWordMap.initWordMap(results);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -72,12 +80,44 @@ public class SensitiveWordBs {
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public static SensitiveWordBs newInstance() {
|
||||
initWordMap();
|
||||
return new SensitiveWordBs();
|
||||
}
|
||||
|
||||
SensitiveWordBs bs = new SensitiveWordBs();
|
||||
bs.context = buildDefaultContext();
|
||||
/**
|
||||
* 初始化
|
||||
*
|
||||
* 1. 根据配置,初始化对应的 map。比较消耗性能。
|
||||
* @since 0.0.13
|
||||
* @return this
|
||||
*/
|
||||
public SensitiveWordBs init() {
|
||||
this.initWordMap();
|
||||
|
||||
return bs;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置禁止的实现
|
||||
* @param wordDeny 禁止的实现
|
||||
* @return this
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public SensitiveWordBs wordDeny(IWordDeny wordDeny) {
|
||||
ArgUtil.notNull(wordDeny, "wordDeny");
|
||||
this.wordDeny = wordDeny;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置允许的实现
|
||||
* @param wordAllow 允许的实现
|
||||
* @return this
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public SensitiveWordBs wordAllow(IWordAllow wordAllow) {
|
||||
ArgUtil.notNull(wordAllow, "wordAllow");
|
||||
this.wordAllow = wordAllow;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -122,7 +162,7 @@ public class SensitiveWordBs {
|
||||
* @return 结果
|
||||
* @since 0.0.4
|
||||
*/
|
||||
private static IWordContext buildDefaultContext() {
|
||||
private IWordContext buildDefaultContext() {
|
||||
IWordContext wordContext = SensitiveWordContext.newInstance();
|
||||
// 格式统一化
|
||||
wordContext.ignoreCase(true);
|
||||
@@ -148,6 +188,8 @@ public class SensitiveWordBs {
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public boolean contains(final String target) {
|
||||
statusCheck();
|
||||
|
||||
return sensitiveWordMap.contains(target, context);
|
||||
}
|
||||
|
||||
@@ -161,6 +203,8 @@ public class SensitiveWordBs {
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public List<String> findAll(final String target) {
|
||||
statusCheck();
|
||||
|
||||
return sensitiveWordMap.findAll(target, context);
|
||||
}
|
||||
|
||||
@@ -173,6 +217,8 @@ public class SensitiveWordBs {
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public String findFirst(final String target) {
|
||||
statusCheck();
|
||||
|
||||
return sensitiveWordMap.findFirst(target, context);
|
||||
}
|
||||
|
||||
@@ -185,6 +231,8 @@ public class SensitiveWordBs {
|
||||
* @since 0.0.2
|
||||
*/
|
||||
public String replace(final String target, final char replaceChar) {
|
||||
statusCheck();
|
||||
|
||||
return sensitiveWordMap.replace(target, replaceChar, context);
|
||||
}
|
||||
|
||||
@@ -200,4 +248,15 @@ public class SensitiveWordBs {
|
||||
return this.replace(target, CharConst.STAR);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 状态校验
|
||||
* @since 0.0.13
|
||||
*/
|
||||
private void statusCheck(){
|
||||
if(sensitiveWordMap == null) {
|
||||
this.init();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
package com.github.houbb.sensitive.word.core;
|
||||
|
||||
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 敏感词工具类
|
||||
* @author binbin.hou
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public final class SensitiveWordHelper {
|
||||
|
||||
private SensitiveWordHelper(){}
|
||||
|
||||
/**
|
||||
* 默认的实现
|
||||
* @since 0.0.13
|
||||
*/
|
||||
private static final SensitiveWordBs WORD_BS = SensitiveWordBs.newInstance().init();
|
||||
|
||||
/**
|
||||
* 是否包含敏感词
|
||||
*
|
||||
* @param target 目标字符串
|
||||
* @return 是否
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public static boolean contains(final String target) {
|
||||
return WORD_BS.contains(target);
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回所有的敏感词
|
||||
* 1. 这里是默认去重的,且是有序的。
|
||||
* 2. 如果不存在,返回空列表
|
||||
*
|
||||
* @param target 目标字符串
|
||||
* @return 敏感词列表
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public static List<String> findAll(final String target) {
|
||||
return WORD_BS.findAll(target);
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回第一个敏感词
|
||||
* (1)如果不存在,则返回 {@code null}
|
||||
*
|
||||
* @param target 目标字符串
|
||||
* @return 敏感词
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public static String findFirst(final String target) {
|
||||
return WORD_BS.findFirst(target);
|
||||
}
|
||||
|
||||
/**
|
||||
* 替换所有内容
|
||||
*
|
||||
* @param target 目标字符串
|
||||
* @param replaceChar 替换为的 char
|
||||
* @return 替换后结果
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public static String replace(final String target, final char replaceChar) {
|
||||
return WORD_BS.replace(target, replaceChar);
|
||||
}
|
||||
|
||||
/**
|
||||
* 替换所有内容
|
||||
* 1. 默认使用空格替换,避免星号改变 md 的格式。
|
||||
*
|
||||
* @param target 目标字符串
|
||||
* @return 替换后结果
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public static String replace(final String target) {
|
||||
return WORD_BS.replace(target);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
package com.github.houbb.sensitive.word.support.allow;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.pipeline.Pipeline;
|
||||
import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
|
||||
import com.github.houbb.sensitive.word.api.IWordAllow;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 初始化类
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.0.13
|
||||
*/
|
||||
@ThreadSafe
|
||||
public abstract class WordAllowInit implements IWordAllow {
|
||||
|
||||
/**
|
||||
* 初始化列表
|
||||
*
|
||||
* @param pipeline 当前列表泳道
|
||||
* @since 0.0.13
|
||||
*/
|
||||
protected abstract void init(final Pipeline<IWordAllow> pipeline);
|
||||
|
||||
@Override
|
||||
public List<String> allow() {
|
||||
Pipeline<IWordAllow> pipeline = new DefaultPipeline<>();
|
||||
this.init(pipeline);
|
||||
|
||||
List<String> results = new ArrayList<>();
|
||||
List<IWordAllow> wordAllows = pipeline.list();
|
||||
for (IWordAllow wordAllow : wordAllows) {
|
||||
List<String> allowList = wordAllow.allow();
|
||||
results.addAll(allowList);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
package com.github.houbb.sensitive.word.support.allow;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.util.io.StreamUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordAllow;
|
||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 系统默认的信息
|
||||
* @author binbin.hou
|
||||
* @since 0.0.13
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class WordAllowSystem implements IWordAllow {
|
||||
|
||||
@Override
|
||||
public List<String> allow() {
|
||||
return StreamUtil.readAllLines("/sensitive_word_allow.txt");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
package com.github.houbb.sensitive.word.support.allow;
|
||||
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.support.pipeline.Pipeline;
|
||||
import com.github.houbb.heaven.util.util.ArrayUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordAllow;
|
||||
|
||||
/**
|
||||
* 所有允许的结果
|
||||
* @author binbin.hou
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public final class WordAllows {
|
||||
|
||||
private WordAllows(){}
|
||||
|
||||
/**
|
||||
* 责任链
|
||||
* @param wordAllow 允许
|
||||
* @param others 其他
|
||||
* @return 结果
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public static IWordAllow chains(final IWordAllow wordAllow,
|
||||
final IWordAllow... others) {
|
||||
return new WordAllowInit() {
|
||||
@Override
|
||||
protected void init(Pipeline<IWordAllow> pipeline) {
|
||||
pipeline.addLast(wordAllow);
|
||||
|
||||
if(ArrayUtil.isNotEmpty(others)) {
|
||||
for(IWordAllow other : others) {
|
||||
pipeline.addLast(other);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 系统实现
|
||||
* @return 结果
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public static IWordAllow system() {
|
||||
return Instances.singleton(WordAllowSystem.class);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
package com.github.houbb.sensitive.word.support.deny;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.pipeline.Pipeline;
|
||||
import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
|
||||
import com.github.houbb.heaven.util.io.StreamUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 初始化类
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.0.13
|
||||
*/
|
||||
@ThreadSafe
|
||||
public abstract class WordDenyInit implements IWordDeny {
|
||||
|
||||
/**
|
||||
* 初始化列表
|
||||
*
|
||||
* @param pipeline 当前列表泳道
|
||||
* @since 0.0.13
|
||||
*/
|
||||
protected abstract void init(final Pipeline<IWordDeny> pipeline);
|
||||
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
Pipeline<IWordDeny> pipeline = new DefaultPipeline<>();
|
||||
this.init(pipeline);
|
||||
|
||||
List<String> results = new ArrayList<>();
|
||||
List<IWordDeny> wordDenies = pipeline.list();
|
||||
for (IWordDeny wordDeny : wordDenies) {
|
||||
List<String> denyList = wordDeny.deny();
|
||||
results.addAll(denyList);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.github.houbb.sensitive.word.support.deny;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.util.guava.Guavas;
|
||||
import com.github.houbb.heaven.util.io.StreamUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||
import com.github.houbb.sensitive.word.constant.AppConst;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 系统默认的信息
|
||||
* @author binbin.hou
|
||||
* @since 0.0.13
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class WordDenySystem implements IWordDeny {
|
||||
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
List<String> results = StreamUtil.readAllLines("/dict.txt");
|
||||
results.addAll(StreamUtil.readAllLines("/dict_en.txt"));
|
||||
results.addAll(StreamUtil.readAllLines("/sensitive_word_deny.txt"));
|
||||
return results;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
package com.github.houbb.sensitive.word.support.deny;
|
||||
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.support.pipeline.Pipeline;
|
||||
import com.github.houbb.heaven.util.util.ArrayUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||
|
||||
/**
|
||||
* 所有拒绝的结果
|
||||
* @author binbin.hou
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public final class WordDenys {
|
||||
|
||||
private WordDenys(){}
|
||||
|
||||
/**
|
||||
* 责任链
|
||||
* @param wordDeny 拒绝
|
||||
* @param others 其他
|
||||
* @return 结果
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public static IWordDeny chains(final IWordDeny wordDeny,
|
||||
final IWordDeny... others) {
|
||||
return new WordDenyInit() {
|
||||
@Override
|
||||
protected void init(Pipeline<IWordDeny> pipeline) {
|
||||
pipeline.addLast(wordDeny);
|
||||
|
||||
if(ArrayUtil.isNotEmpty(others)) {
|
||||
for(IWordDeny other : others) {
|
||||
pipeline.addLast(other);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 系统实现
|
||||
* @return 结果
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public static IWordDeny system() {
|
||||
return Instances.singleton(WordDenySystem.class);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,4 +1,3 @@
|
||||
|
||||
001工程
|
||||
007手机防盗软件任意显软件
|
||||
007间谍专业版
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
||||
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
@@ -97,4 +99,15 @@ public class SensitiveWordBsTest {
|
||||
Assert.assertEquals("fuck", word);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void configTest() {
|
||||
SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
|
||||
.wordDeny(WordDenys.system())
|
||||
.wordAllow(WordAllows.system())
|
||||
.init();
|
||||
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
Assert.assertTrue(wordBs.contains(text));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,100 @@
|
||||
package com.github.houbb.sensitive.word.core;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* <p> project: sensitive-word-SensitiveWordBsTest </p>
|
||||
* <p> create on 2020/1/7 23:43 </p>
|
||||
*
|
||||
* @author Administrator
|
||||
* @since 0.0.13
|
||||
*/
|
||||
public class SensitiveWordHelperTest {
|
||||
|
||||
/**
|
||||
* 是否包含
|
||||
* @since 0.0.1
|
||||
*/
|
||||
@Test
|
||||
public void containsTest() {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
Assert.assertTrue(SensitiveWordHelper.contains(text));
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回所有敏感词
|
||||
* @since 0.0.1
|
||||
*/
|
||||
@Test
|
||||
public void findAllTest() {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
List<String> wordList = SensitiveWordHelper.findAll(text);
|
||||
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回所有第一个匹配的敏感词
|
||||
* @since 0.0.1
|
||||
*/
|
||||
@Test
|
||||
public void findFirstTest() {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
String word = SensitiveWordHelper.findFirst(text);
|
||||
Assert.assertEquals("五星红旗", word);
|
||||
}
|
||||
|
||||
/**
|
||||
* 默认的替换策略
|
||||
* @since 0.0.2
|
||||
*/
|
||||
@Test
|
||||
public void replaceTest() {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
String result = SensitiveWordHelper.replace(text);
|
||||
Assert.assertEquals("****迎风飘扬,***的画像屹立在***前。", result);
|
||||
}
|
||||
|
||||
/**
|
||||
* 自定义字符的替换策略
|
||||
* @since 0.0.2
|
||||
*/
|
||||
@Test
|
||||
public void replaceCharTest() {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
String result = SensitiveWordHelper.replace(text, '0');
|
||||
Assert.assertEquals("0000迎风飘扬,000的画像屹立在000前。", result);
|
||||
}
|
||||
|
||||
/**
|
||||
* 忽略大小写
|
||||
* @since 0.0.4
|
||||
*/
|
||||
@Test
|
||||
public void ignoreCaseTest() {
|
||||
final String text = "fuCK the bad words.";
|
||||
|
||||
String word = SensitiveWordHelper.findFirst(text);
|
||||
Assert.assertEquals("fuCK", word);
|
||||
}
|
||||
|
||||
/**
|
||||
* 忽略半角圆角
|
||||
* @since 0.0.4
|
||||
*/
|
||||
@Test
|
||||
public void ignoreWidthTest() {
|
||||
final String text = "fuck the bad words.";
|
||||
|
||||
String word = SensitiveWordHelper.findFirst(text);
|
||||
Assert.assertEquals("fuck", word);
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user