mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
release branch 0.1.0
This commit is contained in:
@@ -112,3 +112,11 @@
|
|||||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||||
|:---|:---|:---|:---|:--|
|
|:---|:---|:---|:---|:--|
|
||||||
| 1 | A | 优化 init 方式 | 2021-7-16 20:51:58 | |
|
| 1 | A | 优化 init 方式 | 2021-7-16 20:51:58 | |
|
||||||
|
|
||||||
|
# release_0.1.0
|
||||||
|
|
||||||
|
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||||
|
|:---|:---|:---|:---|:--|
|
||||||
|
| 1 | A | 返回敏感词对应的下标范围 | 2021-8-8 20:51:58 | |
|
||||||
|
| 2 | U | ignoreRepeat 默认为 false | 2021-8-8 20:51:58 | |
|
||||||
|
| 3 | U | 把测试、系统、买卖、彩票等常用词移出敏感词库 | 2021-8-8 20:51:58 | |
|
||||||
|
|||||||
57
README.md
57
README.md
@@ -44,6 +44,12 @@
|
|||||||
|
|
||||||
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/doc/CHANGE_LOG.md)
|
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/doc/CHANGE_LOG.md)
|
||||||
|
|
||||||
|
v0.1.0 变更:
|
||||||
|
|
||||||
|
- 返回敏感词对应的开始结束下标信息
|
||||||
|
|
||||||
|
- 优化词库
|
||||||
|
|
||||||
# 快速开始
|
# 快速开始
|
||||||
|
|
||||||
## 准备
|
## 准备
|
||||||
@@ -58,7 +64,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.github.houbb</groupId>
|
<groupId>com.github.houbb</groupId>
|
||||||
<artifactId>sensitive-word</artifactId>
|
<artifactId>sensitive-word</artifactId>
|
||||||
<version>0.0.15</version>
|
<version>0.1.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -69,9 +75,24 @@
|
|||||||
| 方法 | 参数 | 返回值| 说明 |
|
| 方法 | 参数 | 返回值| 说明 |
|
||||||
|:---|:---|:---|:---|
|
|:---|:---|:---|:---|
|
||||||
| contains(String) | 待验证的字符串 | 布尔值 | 验证字符串是否包含敏感词 |
|
| contains(String) | 待验证的字符串 | 布尔值 | 验证字符串是否包含敏感词 |
|
||||||
| findAll(String) | 待验证的字符串 | 字符串列表 | 返回字符串中所有敏感词 |
|
|
||||||
| replace(String, char) | 使用指定的 char 替换敏感词 | 字符串 | 返回脱敏后的字符串 |
|
| replace(String, char) | 使用指定的 char 替换敏感词 | 字符串 | 返回脱敏后的字符串 |
|
||||||
| replace(String) | 使用 `*` 替换敏感词 | 字符串 | 返回脱敏后的字符串 |
|
| replace(String) | 使用 `*` 替换敏感词 | 字符串 | 返回脱敏后的字符串 |
|
||||||
|
| findAll(String) | 待验证的字符串 | 字符串列表 | 返回字符串中所有敏感词 |
|
||||||
|
| findFirst(String) | 待验证的字符串 | 字符串 | 返回字符串中第一个敏感词 |
|
||||||
|
| findAll(String, IWordResultHandler) | IWordResultHandler 结果处理类 | 字符串列表 | 返回字符串中所有敏感词 |
|
||||||
|
| findFirst(String, IWordResultHandler) | IWordResultHandler 结果处理类 | 字符串 | 返回字符串中第一个敏感词 |
|
||||||
|
|
||||||
|
IWordResultHandler 可以对敏感词的结果进行处理,允许用户自定义。
|
||||||
|
|
||||||
|
内置实现见 `WordResultHandlers` 工具类:
|
||||||
|
|
||||||
|
- WordResultHandlers.word()
|
||||||
|
|
||||||
|
只保留敏感词单词本身。
|
||||||
|
|
||||||
|
- WordResultHandlers.raw()
|
||||||
|
|
||||||
|
保留敏感词相关信息,包含敏感词,开始和结束下标。
|
||||||
|
|
||||||
## 使用实例
|
## 使用实例
|
||||||
|
|
||||||
@@ -94,6 +115,21 @@ String word = SensitiveWordHelper.findFirst(text);
|
|||||||
Assert.assertEquals("五星红旗", word);
|
Assert.assertEquals("五星红旗", word);
|
||||||
```
|
```
|
||||||
|
|
||||||
|
SensitiveWordHelper.findFirst(text) 等价于:
|
||||||
|
|
||||||
|
```java
|
||||||
|
String word = SensitiveWordHelper.findFirst(text, WordResultHandlers.word());
|
||||||
|
```
|
||||||
|
|
||||||
|
WordResultHandlers.raw() 可以保留对应的下标信息:
|
||||||
|
|
||||||
|
```java
|
||||||
|
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||||
|
|
||||||
|
IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw());
|
||||||
|
Assert.assertEquals("WordResult{word='五星红旗', startIndex=0, endIndex=4}", word.toString());
|
||||||
|
```
|
||||||
|
|
||||||
### 返回所有敏感词
|
### 返回所有敏感词
|
||||||
|
|
||||||
```java
|
```java
|
||||||
@@ -103,6 +139,23 @@ List<String> wordList = SensitiveWordHelper.findAll(text);
|
|||||||
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString());
|
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString());
|
||||||
```
|
```
|
||||||
|
|
||||||
|
返回所有敏感词用法上类似于 SensitiveWordHelper.findFirst(),同样也支持指定结果处理类。
|
||||||
|
|
||||||
|
SensitiveWordHelper.findAll(text) 等价于:
|
||||||
|
|
||||||
|
```java
|
||||||
|
List<String> wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.word());
|
||||||
|
```
|
||||||
|
|
||||||
|
WordResultHandlers.raw() 可以保留对应的下标信息:
|
||||||
|
|
||||||
|
```java
|
||||||
|
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||||
|
|
||||||
|
List<IWordResult> wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
|
||||||
|
Assert.assertEquals("[WordResult{word='五星红旗', startIndex=0, endIndex=4}, WordResult{word='毛主席', startIndex=9, endIndex=12}, WordResult{word='天安门', startIndex=18, endIndex=21}]", wordList.toString());
|
||||||
|
```
|
||||||
|
|
||||||
### 默认的替换策略
|
### 默认的替换策略
|
||||||
|
|
||||||
```java
|
```java
|
||||||
|
|||||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.github.houbb</groupId>
|
<groupId>com.github.houbb</groupId>
|
||||||
<artifactId>sensitive-word</artifactId>
|
<artifactId>sensitive-word</artifactId>
|
||||||
<version>0.0.15</version>
|
<version>0.1.0</version>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<!--============================== All Plugins START ==============================-->
|
<!--============================== All Plugins START ==============================-->
|
||||||
|
|||||||
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
|
|||||||
|
|
||||||
:: 版本号信息(需要手动指定)
|
:: 版本号信息(需要手动指定)
|
||||||
:::: 旧版本名称
|
:::: 旧版本名称
|
||||||
SET version=0.0.15
|
SET version=0.1.0
|
||||||
:::: 新版本名称
|
:::: 新版本名称
|
||||||
SET newVersion=0.0.16
|
SET newVersion=0.2.0
|
||||||
:::: 组织名称
|
:::: 组织名称
|
||||||
SET groupName=com.github.houbb
|
SET groupName=com.github.houbb
|
||||||
:::: 项目名称
|
:::: 项目名称
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ public interface IWordMap extends ISensitiveCheck {
|
|||||||
* @since 0.0.1
|
* @since 0.0.1
|
||||||
* @see ValidModeEnum#FAIL_OVER 建议使用全部检测返回模式
|
* @see ValidModeEnum#FAIL_OVER 建议使用全部检测返回模式
|
||||||
*/
|
*/
|
||||||
List<String> findAll(final String string,
|
List<IWordResult> findAll(final String string,
|
||||||
final IWordContext context);
|
final IWordContext context);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -50,7 +50,7 @@ public interface IWordMap extends ISensitiveCheck {
|
|||||||
* @return 结果
|
* @return 结果
|
||||||
* @since 0.0.1
|
* @since 0.0.1
|
||||||
*/
|
*/
|
||||||
String findFirst(final String string,
|
IWordResult findFirst(final String string,
|
||||||
final IWordContext context);
|
final IWordContext context);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -0,0 +1,31 @@
|
|||||||
|
package com.github.houbb.sensitive.word.api;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 敏感词的结果
|
||||||
|
* @author binbin.hou
|
||||||
|
* @since 0.1.0
|
||||||
|
*/
|
||||||
|
public interface IWordResult {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 敏感词
|
||||||
|
* @return 敏感词
|
||||||
|
* @since 0.1.0
|
||||||
|
*/
|
||||||
|
String word();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 开始下标
|
||||||
|
* @return 开始下标
|
||||||
|
* @since 0.1.0
|
||||||
|
*/
|
||||||
|
int startIndex();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 结束下标
|
||||||
|
* @return 结束下标
|
||||||
|
* @since 0.1.0
|
||||||
|
*/
|
||||||
|
int endIndex();
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
package com.github.houbb.sensitive.word.api;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 敏感词的结果处理
|
||||||
|
* @author binbin.hou
|
||||||
|
* @since 0.1.0
|
||||||
|
*/
|
||||||
|
public interface IWordResultHandler<R> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 对于结果的处理
|
||||||
|
* @param wordResult 结果
|
||||||
|
* @return 处理结果
|
||||||
|
* @since 0.1.0
|
||||||
|
*/
|
||||||
|
R handle(final IWordResult wordResult);
|
||||||
|
|
||||||
|
}
|
||||||
@@ -1,15 +1,14 @@
|
|||||||
package com.github.houbb.sensitive.word.bs;
|
package com.github.houbb.sensitive.word.bs;
|
||||||
|
|
||||||
import com.github.houbb.heaven.constant.CharConst;
|
import com.github.houbb.heaven.constant.CharConst;
|
||||||
|
import com.github.houbb.heaven.support.handler.IHandler;
|
||||||
import com.github.houbb.heaven.util.common.ArgUtil;
|
import com.github.houbb.heaven.util.common.ArgUtil;
|
||||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||||
import com.github.houbb.sensitive.word.api.IWordAllow;
|
import com.github.houbb.sensitive.word.api.*;
|
||||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
|
||||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
|
||||||
import com.github.houbb.sensitive.word.api.IWordMap;
|
|
||||||
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
||||||
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||||
import com.github.houbb.sensitive.word.support.map.SensitiveWordMap;
|
import com.github.houbb.sensitive.word.support.map.SensitiveWordMap;
|
||||||
|
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
@@ -240,7 +239,7 @@ public class SensitiveWordBs {
|
|||||||
wordContext.ignoreNumStyle(true);
|
wordContext.ignoreNumStyle(true);
|
||||||
wordContext.ignoreChineseStyle(true);
|
wordContext.ignoreChineseStyle(true);
|
||||||
wordContext.ignoreEnglishStyle(true);
|
wordContext.ignoreEnglishStyle(true);
|
||||||
wordContext.ignoreRepeat(true);
|
wordContext.ignoreRepeat(false);
|
||||||
|
|
||||||
// 开启校验
|
// 开启校验
|
||||||
wordContext.sensitiveCheckNum(true);
|
wordContext.sensitiveCheckNum(true);
|
||||||
@@ -273,9 +272,7 @@ public class SensitiveWordBs {
|
|||||||
* @since 0.0.1
|
* @since 0.0.1
|
||||||
*/
|
*/
|
||||||
public List<String> findAll(final String target) {
|
public List<String> findAll(final String target) {
|
||||||
statusCheck();
|
return findAll(target, WordResultHandlers.word());
|
||||||
|
|
||||||
return sensitiveWordMap.findAll(target, context);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -287,11 +284,48 @@ public class SensitiveWordBs {
|
|||||||
* @since 0.0.1
|
* @since 0.0.1
|
||||||
*/
|
*/
|
||||||
public String findFirst(final String target) {
|
public String findFirst(final String target) {
|
||||||
|
return findFirst(target, WordResultHandlers.word());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 返回所有的敏感词
|
||||||
|
* 1. 这里是默认去重的,且是有序的。
|
||||||
|
* 2. 如果不存在,返回空列表
|
||||||
|
*
|
||||||
|
* @param target 目标字符串
|
||||||
|
* @return 敏感词列表
|
||||||
|
* @since 0.0.1
|
||||||
|
*/
|
||||||
|
public <R> List<R> findAll(final String target, final IWordResultHandler<R> handler) {
|
||||||
|
ArgUtil.notNull(handler, "handler");
|
||||||
statusCheck();
|
statusCheck();
|
||||||
|
|
||||||
return sensitiveWordMap.findFirst(target, context);
|
List<IWordResult> wordResults = sensitiveWordMap.findAll(target, context);
|
||||||
|
return CollectionUtil.toList(wordResults, new IHandler<IWordResult, R>() {
|
||||||
|
@Override
|
||||||
|
public R handle(IWordResult wordResult) {
|
||||||
|
return handler.handle(wordResult);
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 返回第一个敏感词
|
||||||
|
* (1)如果不存在,则返回 {@code null}
|
||||||
|
*
|
||||||
|
* @param target 目标字符串
|
||||||
|
* @return 敏感词
|
||||||
|
* @since 0.0.1
|
||||||
|
*/
|
||||||
|
public <R> R findFirst(final String target, final IWordResultHandler<R> handler) {
|
||||||
|
ArgUtil.notNull(handler, "handler");
|
||||||
|
statusCheck();
|
||||||
|
|
||||||
|
IWordResult wordResult = sensitiveWordMap.findFirst(target, context);
|
||||||
|
return handler.handle(wordResult);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 替换所有内容
|
* 替换所有内容
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -1,20 +1,24 @@
|
|||||||
package com.github.houbb.sensitive.word.core;
|
package com.github.houbb.sensitive.word.core;
|
||||||
|
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordResultHandler;
|
||||||
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 敏感词工具类
|
* 敏感词工具类
|
||||||
|
*
|
||||||
* @author binbin.hou
|
* @author binbin.hou
|
||||||
* @since 0.0.13
|
* @since 0.0.13
|
||||||
*/
|
*/
|
||||||
public final class SensitiveWordHelper {
|
public final class SensitiveWordHelper {
|
||||||
|
|
||||||
private SensitiveWordHelper(){}
|
private SensitiveWordHelper() {
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 默认的实现
|
* 默认的实现
|
||||||
|
*
|
||||||
* @since 0.0.13
|
* @since 0.0.13
|
||||||
*/
|
*/
|
||||||
private static final SensitiveWordBs WORD_BS = SensitiveWordBs.newInstance().init();
|
private static final SensitiveWordBs WORD_BS = SensitiveWordBs.newInstance().init();
|
||||||
@@ -79,4 +83,31 @@ public final class SensitiveWordHelper {
|
|||||||
return WORD_BS.replace(target);
|
return WORD_BS.replace(target);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 返回所有的敏感词
|
||||||
|
*
|
||||||
|
* @param target 目标字符串
|
||||||
|
* @param handler 结果处理类
|
||||||
|
* @return 敏感词列表
|
||||||
|
* @since 0.1.0
|
||||||
|
*/
|
||||||
|
public static <R> List<R> findAll(final String target,
|
||||||
|
final IWordResultHandler<R> handler) {
|
||||||
|
return WORD_BS.findAll(target, handler);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 返回第一个敏感词
|
||||||
|
* (1)如果不存在,则返回 {@code null}
|
||||||
|
*
|
||||||
|
* @param target 目标字符串
|
||||||
|
* @param handler 结果处理类
|
||||||
|
* @return 敏感词
|
||||||
|
* @since 0.1.0
|
||||||
|
*/
|
||||||
|
public static <R> R findFirst(final String target,
|
||||||
|
final IWordResultHandler<R> handler) {
|
||||||
|
return WORD_BS.findFirst(target, handler);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,11 +11,13 @@ import com.github.houbb.heaven.util.util.CollectionUtil;
|
|||||||
import com.github.houbb.heaven.util.util.MapUtil;
|
import com.github.houbb.heaven.util.util.MapUtil;
|
||||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||||
import com.github.houbb.sensitive.word.api.IWordMap;
|
import com.github.houbb.sensitive.word.api.IWordMap;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||||
import com.github.houbb.sensitive.word.constant.AppConst;
|
import com.github.houbb.sensitive.word.constant.AppConst;
|
||||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||||
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
|
||||||
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckChain;
|
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckChain;
|
||||||
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl;
|
import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl;
|
||||||
|
import com.github.houbb.sensitive.word.support.result.WordResult;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@@ -131,20 +133,20 @@ public class SensitiveWordMap implements IWordMap {
|
|||||||
/**
|
/**
|
||||||
* 返回所有对应的敏感词
|
* 返回所有对应的敏感词
|
||||||
* (1)结果是有序的
|
* (1)结果是有序的
|
||||||
* (2)结果是默认去重的
|
* (2)为了保留所有的下标,结果从 v0.1.0 之后不再去重。
|
||||||
*
|
*
|
||||||
* @param string 原始字符串
|
* @param string 原始字符串
|
||||||
* @return 结果
|
* @return 结果
|
||||||
* @since 0.0.1
|
* @since 0.0.1
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public List<String> findAll(String string, final IWordContext context) {
|
public List<IWordResult> findAll(String string, final IWordContext context) {
|
||||||
return getSensitiveWords(string, ValidModeEnum.FAIL_OVER, context);
|
return getSensitiveWords(string, ValidModeEnum.FAIL_OVER, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String findFirst(String string, final IWordContext context) {
|
public IWordResult findFirst(String string, final IWordContext context) {
|
||||||
List<String> stringList = getSensitiveWords(string, ValidModeEnum.FAIL_FAST, context);
|
List<IWordResult> stringList = getSensitiveWords(string, ValidModeEnum.FAIL_FAST, context);
|
||||||
|
|
||||||
if (CollectionUtil.isEmpty(stringList)) {
|
if (CollectionUtil.isEmpty(stringList)) {
|
||||||
return null;
|
return null;
|
||||||
@@ -170,14 +172,14 @@ public class SensitiveWordMap implements IWordMap {
|
|||||||
* @return 结果列表
|
* @return 结果列表
|
||||||
* @since 0.0.1
|
* @since 0.0.1
|
||||||
*/
|
*/
|
||||||
private List<String> getSensitiveWords(final String text, final ValidModeEnum modeEnum,
|
private List<IWordResult> getSensitiveWords(final String text, final ValidModeEnum modeEnum,
|
||||||
final IWordContext context) {
|
final IWordContext context) {
|
||||||
//1. 是否存在敏感词,如果比存在,直接返回空列表
|
//1. 是否存在敏感词,如果比存在,直接返回空列表
|
||||||
if (StringUtil.isEmpty(text)) {
|
if (StringUtil.isEmpty(text)) {
|
||||||
return Guavas.newArrayList();
|
return Guavas.newArrayList();
|
||||||
}
|
}
|
||||||
|
|
||||||
List<String> resultList = Guavas.newArrayList();
|
List<IWordResult> resultList = Guavas.newArrayList();
|
||||||
for (int i = 0; i < text.length(); i++) {
|
for (int i = 0; i < text.length(); i++) {
|
||||||
SensitiveCheckResult checkResult = sensitiveCheck(text, i, ValidModeEnum.FAIL_OVER, context);
|
SensitiveCheckResult checkResult = sensitiveCheck(text, i, ValidModeEnum.FAIL_OVER, context);
|
||||||
// 命中
|
// 命中
|
||||||
@@ -187,9 +189,11 @@ public class SensitiveWordMap implements IWordMap {
|
|||||||
String sensitiveWord = text.substring(i, i + wordLength);
|
String sensitiveWord = text.substring(i, i + wordLength);
|
||||||
|
|
||||||
// 添加去重
|
// 添加去重
|
||||||
if (!resultList.contains(sensitiveWord)) {
|
WordResult wordResult = WordResult.newInstance()
|
||||||
resultList.add(sensitiveWord);
|
.startIndex(i)
|
||||||
}
|
.endIndex(i+wordLength)
|
||||||
|
.word(sensitiveWord);
|
||||||
|
resultList.add(wordResult);
|
||||||
|
|
||||||
// 快速返回
|
// 快速返回
|
||||||
if (ValidModeEnum.FAIL_FAST.equals(modeEnum)) {
|
if (ValidModeEnum.FAIL_FAST.equals(modeEnum)) {
|
||||||
|
|||||||
@@ -0,0 +1,60 @@
|
|||||||
|
package com.github.houbb.sensitive.word.support.result;
|
||||||
|
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author binbin.hou
|
||||||
|
* @since 0.1.0
|
||||||
|
*/
|
||||||
|
public class WordResult implements IWordResult {
|
||||||
|
|
||||||
|
private String word;
|
||||||
|
|
||||||
|
private int startIndex;
|
||||||
|
|
||||||
|
private int endIndex;
|
||||||
|
|
||||||
|
public static WordResult newInstance() {
|
||||||
|
return new WordResult();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String word() {
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
|
||||||
|
public WordResult word(String word) {
|
||||||
|
this.word = word;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int startIndex() {
|
||||||
|
return startIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
public WordResult startIndex(int startIndex) {
|
||||||
|
this.startIndex = startIndex;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int endIndex() {
|
||||||
|
return endIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
public WordResult endIndex(int endIndex) {
|
||||||
|
this.endIndex = endIndex;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "WordResult{" +
|
||||||
|
"word='" + word + '\'' +
|
||||||
|
", startIndex=" + startIndex +
|
||||||
|
", endIndex=" + endIndex +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
package com.github.houbb.sensitive.word.support.result;
|
||||||
|
|
||||||
|
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordResultHandler;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 不做任何处理
|
||||||
|
* @author binbin.hou
|
||||||
|
* @since 0.1.0
|
||||||
|
*/
|
||||||
|
@ThreadSafe
|
||||||
|
public class WordResultHandlerRaw implements IWordResultHandler<IWordResult> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IWordResult handle(IWordResult wordResult) {
|
||||||
|
return wordResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
package com.github.houbb.sensitive.word.support.result;
|
||||||
|
|
||||||
|
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordResultHandler;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 只保留单词
|
||||||
|
*
|
||||||
|
* @author binbin.hou
|
||||||
|
* @since 0.1.0
|
||||||
|
*/
|
||||||
|
@ThreadSafe
|
||||||
|
public class WordResultHandlerWord implements IWordResultHandler<String> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String handle(IWordResult wordResult) {
|
||||||
|
if(wordResult == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return wordResult.word();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
package com.github.houbb.sensitive.word.support.result;
|
||||||
|
|
||||||
|
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordResultHandler;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 敏感词的结果处理
|
||||||
|
* @author binbin.hou
|
||||||
|
* @since 0.1.0
|
||||||
|
*/
|
||||||
|
public final class WordResultHandlers {
|
||||||
|
|
||||||
|
private WordResultHandlers(){}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 不做任何处理
|
||||||
|
* @return 结果
|
||||||
|
* @since 0.1.0
|
||||||
|
*/
|
||||||
|
public static IWordResultHandler<IWordResult> raw() {
|
||||||
|
return Instances.singleton(WordResultHandlerRaw.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 只保留单词
|
||||||
|
* @return 结果
|
||||||
|
* @since 0.1.0
|
||||||
|
*/
|
||||||
|
public static IWordResultHandler<String> word() {
|
||||||
|
return Instances.singleton(WordResultHandlerWord.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -39,7 +39,6 @@
|
|||||||
106bb成人小电影
|
106bb成人小电影
|
||||||
1074套豪华住宅
|
1074套豪华住宅
|
||||||
107bb成人小电影
|
107bb成人小电影
|
||||||
108
|
|
||||||
10bet娱乐城
|
10bet娱乐城
|
||||||
10littleasiansvol1410人微乳素人娘
|
10littleasiansvol1410人微乳素人娘
|
||||||
10种肮脏的职业
|
10种肮脏的职业
|
||||||
@@ -74,10 +73,8 @@
|
|||||||
14se淫色图片
|
14se淫色图片
|
||||||
14se淫色图片免费vagaa电影免费电影
|
14se淫色图片免费vagaa电影免费电影
|
||||||
14岁幼女b嫩阴
|
14岁幼女b嫩阴
|
||||||
14我
|
|
||||||
14色se成人站
|
14色se成人站
|
||||||
15年前的共和国卫士
|
15年前的共和国卫士
|
||||||
15年纪念日
|
|
||||||
1608请牢记
|
1608请牢记
|
||||||
163网址宽频影院网
|
163网址宽频影院网
|
||||||
16474勼55121
|
16474勼55121
|
||||||
@@ -10808,7 +10805,6 @@ z以留吧以其以武
|
|||||||
买凶杀人
|
买凶杀人
|
||||||
买到冰毒
|
买到冰毒
|
||||||
买到枪
|
买到枪
|
||||||
买卖
|
|
||||||
买卖16号双管猎枪
|
买卖16号双管猎枪
|
||||||
买卖1元假硬币
|
买卖1元假硬币
|
||||||
买卖1元假硬币qq
|
买卖1元假硬币qq
|
||||||
@@ -44047,9 +44043,7 @@ z以留吧以其以武
|
|||||||
测拉式气枪
|
测拉式气枪
|
||||||
测拿
|
测拿
|
||||||
测绘局
|
测绘局
|
||||||
测试
|
|
||||||
测试inter
|
测试inter
|
||||||
测试员
|
|
||||||
测试哈哈哈哈啊
|
测试哈哈哈哈啊
|
||||||
测试测试数据817
|
测试测试数据817
|
||||||
测试额81719792825477
|
测试额81719792825477
|
||||||
@@ -50977,7 +50971,6 @@ z以留吧以其以武
|
|||||||
简鸿章
|
简鸿章
|
||||||
管制刀具
|
管制刀具
|
||||||
管猎
|
管猎
|
||||||
管理
|
|
||||||
管理人员
|
管理人员
|
||||||
管理员
|
管理员
|
||||||
管理就是收费
|
管理就是收费
|
||||||
@@ -51139,7 +51132,6 @@ z以留吧以其以武
|
|||||||
糖舒宁胶囊
|
糖舒宁胶囊
|
||||||
系列户改措施
|
系列户改措施
|
||||||
系列送礼活动
|
系列送礼活动
|
||||||
系统
|
|
||||||
系统中心
|
系统中心
|
||||||
系统信息
|
系统信息
|
||||||
系统公告
|
系统公告
|
||||||
@@ -65355,5 +65347,3 @@ z以留吧以其以武
|
|||||||
龟毛
|
龟毛
|
||||||
𨰾
|
𨰾
|
||||||
𫔰苞价咯
|
𫔰苞价咯
|
||||||
彩票
|
|
||||||
机票
|
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
package com.github.houbb.sensitive.word.core;
|
package com.github.houbb.sensitive.word.core;
|
||||||
|
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||||
|
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
@@ -37,6 +39,31 @@ public class SensitiveWordHelperTest {
|
|||||||
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString());
|
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 返回所有敏感词
|
||||||
|
* @since 0.1.0
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void findAllWordTest() {
|
||||||
|
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||||
|
|
||||||
|
List<String> wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.word());
|
||||||
|
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 返回所有敏感词-包含下标志
|
||||||
|
* @since 0.1.0
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void findAllRawTest() {
|
||||||
|
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||||
|
|
||||||
|
List<IWordResult> wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
|
||||||
|
Assert.assertEquals("[WordResult{word='五星红旗', startIndex=0, endIndex=4}, WordResult{word='毛主席', startIndex=9, endIndex=12}, WordResult{word='天安门', startIndex=18, endIndex=21}]", wordList.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 返回所有第一个匹配的敏感词
|
* 返回所有第一个匹配的敏感词
|
||||||
* @since 0.0.1
|
* @since 0.0.1
|
||||||
@@ -49,6 +76,30 @@ public class SensitiveWordHelperTest {
|
|||||||
Assert.assertEquals("五星红旗", word);
|
Assert.assertEquals("五星红旗", word);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 返回所有第一个匹配的敏感词
|
||||||
|
* @since 0.0.1
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void findFirstWordTest() {
|
||||||
|
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||||
|
|
||||||
|
String word = SensitiveWordHelper.findFirst(text, WordResultHandlers.word());
|
||||||
|
Assert.assertEquals("五星红旗", word);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 返回所有第一个匹配的敏感词
|
||||||
|
* @since 0.0.1
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void findFirstRawTest() {
|
||||||
|
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||||
|
|
||||||
|
IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw());
|
||||||
|
Assert.assertEquals("WordResult{word='五星红旗', startIndex=0, endIndex=4}", word.toString());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 默认的替换策略
|
* 默认的替换策略
|
||||||
* @since 0.0.2
|
* @since 0.0.2
|
||||||
|
|||||||
Reference in New Issue
Block a user