mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
release branch 0.14.0
This commit is contained in:
@@ -285,3 +285,10 @@
|
||||
| 5 | O | 移除 涿州、中国联通、中国网通、李老师、写字楼、人民银行、卡拉、牛魔王、 张杰、哪里找、爱因斯坦、天地之间、不玩了、里根、监听器、监听 | 2024-4-9 21:04:18 | |
|
||||
| 6 | O | 移除 选举、登陆、中国移动、运营者、客户服务、精益求精、下载速度、好朋友、拦截器、账号、无界、深入浅出、腾讯、解码器、管理员、白皮书 | 2024-4-9 21:04:18 | |
|
||||
| 7 | O | 移除 监听、运营商、一起玩、转化、超市 | 2024-4-9 21:04:18 | |
|
||||
|
||||
|
||||
# release_0.14.0
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|------------|:-------------------|:-------------------------------------------------|
|
||||
| 1 | A | 结果添加敏感词的类别 | 2024-4-11 15:02:25 | |
|
||||
|
||||
22
README.md
22
README.md
@@ -52,6 +52,8 @@
|
||||
|
||||
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md)
|
||||
|
||||
V0.14.0: raw 添加敏感词类别。
|
||||
|
||||
## 更多资料
|
||||
|
||||
### 敏感词控台
|
||||
@@ -82,7 +84,7 @@
|
||||
<dependency>
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.13.4</version>
|
||||
<version>0.14.0</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@@ -127,15 +129,6 @@ SensitiveWordHelper.findFirst(text) 等价于:
|
||||
String word = SensitiveWordHelper.findFirst(text, WordResultHandlers.word());
|
||||
```
|
||||
|
||||
WordResultHandlers.raw() 可以保留对应的下标信息:
|
||||
|
||||
```java
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw());
|
||||
Assert.assertEquals("WordResult{startIndex=0, endIndex=4}", word.toString());
|
||||
```
|
||||
|
||||
### 返回所有敏感词
|
||||
|
||||
```java
|
||||
@@ -153,13 +146,12 @@ SensitiveWordHelper.findAll(text) 等价于:
|
||||
List<String> wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.word());
|
||||
```
|
||||
|
||||
WordResultHandlers.raw() 可以保留对应的下标信息:
|
||||
WordResultHandlers.raw() 可以保留对应的下标信息、类别信息:
|
||||
|
||||
```java
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
List<IWordResult> wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
|
||||
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4}, WordResult{startIndex=9, endIndex=12}, WordResult{startIndex=18, endIndex=21}]", wordList.toString());
|
||||
final String text = "骂人:你他妈; 邮箱:123@qq.com; mobile: 13088889999; 网址:https://www.baidu.com";
|
||||
List<IWordResult> wordList3 = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
|
||||
Assert.assertEquals("[WordResult{startIndex=3, endIndex=6, type='WORD'}, WordResult{startIndex=11, endIndex=21, type='EMAIL'}, WordResult{startIndex=31, endIndex=42, type='NUM'}, WordResult{startIndex=55, endIndex=68, type='URL'}]", wordList3.toString());
|
||||
```
|
||||
|
||||
### 默认的替换策略
|
||||
|
||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.13.4</version>
|
||||
<version>0.14.0</version>
|
||||
|
||||
<properties>
|
||||
<!--============================== All Plugins START ==============================-->
|
||||
|
||||
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
|
||||
|
||||
:: 版本号信息(需要手动指定)
|
||||
:::: 旧版本名称
|
||||
SET version=0.13.4
|
||||
SET version=0.14.0
|
||||
:::: 新版本名称
|
||||
SET newVersion=0.14.0
|
||||
SET newVersion=0.15.0
|
||||
:::: 组织名称
|
||||
SET groupName=com.github.houbb
|
||||
:::: 项目名称
|
||||
|
||||
@@ -21,4 +21,11 @@ public interface IWordResult {
|
||||
*/
|
||||
int endIndex();
|
||||
|
||||
/**
|
||||
* 类别
|
||||
* @return 类别
|
||||
* @since 0.14.0
|
||||
*/
|
||||
String type();
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
package com.github.houbb.sensitive.word.constant.enums;
|
||||
|
||||
/**
|
||||
* 单词类别包含类别
|
||||
* @since 0.14.0
|
||||
*/
|
||||
public enum WordTypeEnum {
|
||||
WORD("WORD", "敏感词"),
|
||||
EMAIL("EMAIL", "邮箱"),
|
||||
URL("URL", "链接"),
|
||||
NUM("NUM", "数字"),
|
||||
|
||||
DEFAULTS("DEFAULTS", "默认"),
|
||||
;
|
||||
|
||||
private final String code;
|
||||
private final String desc;
|
||||
|
||||
WordTypeEnum(String code, String desc) {
|
||||
this.code = code;
|
||||
this.desc = desc;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public String getDesc() {
|
||||
return desc;
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.core;
|
||||
import com.github.houbb.heaven.util.guava.Guavas;
|
||||
import com.github.houbb.sensitive.word.api.*;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.WordCheckResult;
|
||||
import com.github.houbb.sensitive.word.support.result.WordResult;
|
||||
@@ -67,7 +68,8 @@ public class SensitiveWord extends AbstractSensitiveWord {
|
||||
// 保存敏感词
|
||||
WordResult wordResult = WordResult.newInstance()
|
||||
.startIndex(i)
|
||||
.endIndex(i+wordLength);
|
||||
.endIndex(i+wordLength)
|
||||
.type(checkResult.type());
|
||||
//v0.13.0 添加判断
|
||||
if(wordResultCondition.match(wordResult, text, modeEnum, context)) {
|
||||
resultList.add(wordResult);
|
||||
|
||||
@@ -30,18 +30,31 @@ public abstract class AbstractWordCheck implements IWordCheck {
|
||||
*/
|
||||
protected abstract int getActualLength(int beginIndex, final InnerSensitiveWordContext checkContext);
|
||||
|
||||
/**
|
||||
* 获取类别
|
||||
* @return 类别
|
||||
* @since 0.14.0
|
||||
*/
|
||||
protected abstract String getType();
|
||||
|
||||
@Override
|
||||
public WordCheckResult sensitiveCheck(int beginIndex,
|
||||
final InnerSensitiveWordContext checkContext) {
|
||||
Class<? extends IWordCheck> clazz = getSensitiveCheckClass();
|
||||
final String txt = checkContext.originalText();
|
||||
if(StringUtil.isEmpty(txt)) {
|
||||
return WordCheckResult.of(0, clazz);
|
||||
return WordCheckResult.newInstance()
|
||||
.index(0)
|
||||
.type(getType())
|
||||
.checkClass(clazz);
|
||||
}
|
||||
|
||||
int actualLength = getActualLength(beginIndex, checkContext);
|
||||
|
||||
return WordCheckResult.of(actualLength, clazz);
|
||||
return WordCheckResult.newInstance()
|
||||
.index(actualLength)
|
||||
.type(getType())
|
||||
.checkClass(clazz);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import com.github.houbb.heaven.util.util.regex.RegexUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordCheck;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.WordConst;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
|
||||
|
||||
/**
|
||||
* email 正则表达式检测实现。
|
||||
@@ -39,6 +40,11 @@ public class WordCheckEmail extends AbstractConditionWordCheck {
|
||||
return WordCheckEmail.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getType() {
|
||||
return WordTypeEnum.EMAIL.getCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) {
|
||||
return CharUtil.isEmilChar(mappingChar);
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.support.check;
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.sensitive.word.api.IWordCheck;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
|
||||
|
||||
/**
|
||||
* 未匹配
|
||||
@@ -25,7 +26,10 @@ public class WordCheckNone implements IWordCheck {
|
||||
/**
|
||||
* 只有一个未匹配
|
||||
*/
|
||||
private static final WordCheckResult NONE_RESULT = WordCheckResult.of(0, WordCheckNone.class);
|
||||
private static final WordCheckResult NONE_RESULT = WordCheckResult.newInstance()
|
||||
.type(WordTypeEnum.DEFAULTS.getCode())
|
||||
.index(0)
|
||||
.checkClass(WordCheckNone.class);
|
||||
|
||||
public static WordCheckResult getNoneResult() {
|
||||
return NONE_RESULT;
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.support.check;
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.sensitive.word.api.IWordCheck;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
|
||||
|
||||
/**
|
||||
* 敏感词监测实现
|
||||
@@ -28,6 +29,11 @@ public class WordCheckNum extends AbstractConditionWordCheck {
|
||||
return WordCheckNum.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getType() {
|
||||
return WordTypeEnum.NUM.getCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) {
|
||||
return Character.isDigit(mappingChar);
|
||||
|
||||
@@ -24,17 +24,15 @@ public class WordCheckResult {
|
||||
private Class<? extends IWordCheck> checkClass;
|
||||
|
||||
/**
|
||||
* 实例化
|
||||
* @param index 返回索引
|
||||
* @param checkClass 验证类
|
||||
* @return 结果
|
||||
* @since 0.0.12
|
||||
* 单词类别
|
||||
* @since 0.14.0
|
||||
*/
|
||||
public static WordCheckResult of(final int index,
|
||||
final Class<? extends IWordCheck> checkClass) {
|
||||
WordCheckResult result = new WordCheckResult();
|
||||
result.index(index).checkClass(checkClass);
|
||||
return result;
|
||||
private String type;
|
||||
|
||||
private WordCheckResult(){}
|
||||
|
||||
public static WordCheckResult newInstance() {
|
||||
return new WordCheckResult();
|
||||
}
|
||||
|
||||
public int index() {
|
||||
@@ -55,11 +53,21 @@ public class WordCheckResult {
|
||||
return this;
|
||||
}
|
||||
|
||||
public String type() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public WordCheckResult type(String type) {
|
||||
this.type = type;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SensitiveCheckResult{" +
|
||||
return "WordCheckResult{" +
|
||||
"index=" + index +
|
||||
", checkClass=" + checkClass +
|
||||
", type='" + type + '\'' +
|
||||
'}';
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ import com.github.houbb.heaven.util.util.regex.RegexUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordCheck;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.WordConst;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
|
||||
|
||||
/**
|
||||
* URL 正则表达式检测实现。
|
||||
@@ -36,6 +37,11 @@ public class WordCheckUrl extends AbstractConditionWordCheck {
|
||||
return WordCheckUrl.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getType() {
|
||||
return WordTypeEnum.URL.getCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) {
|
||||
return CharUtil.isWebSiteChar(mappingChar);
|
||||
|
||||
@@ -6,6 +6,7 @@ import com.github.houbb.sensitive.word.api.IWordCheck;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordData;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
|
||||
|
||||
@@ -83,4 +84,9 @@ public class WordCheckWord extends AbstractWordCheck {
|
||||
return actualLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getType() {
|
||||
return WordTypeEnum.WORD.getCode();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -12,6 +12,14 @@ public class WordResult implements IWordResult {
|
||||
|
||||
private int endIndex;
|
||||
|
||||
/**
|
||||
* 词类别
|
||||
* @since 0.14.0
|
||||
*/
|
||||
private String type;
|
||||
|
||||
private WordResult(){}
|
||||
|
||||
public static WordResult newInstance() {
|
||||
return new WordResult();
|
||||
}
|
||||
@@ -36,11 +44,22 @@ public class WordResult implements IWordResult {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String type() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public WordResult type(String type) {
|
||||
this.type = type;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "WordResult{" +
|
||||
"startIndex=" + startIndex +
|
||||
", endIndex=" + endIndex +
|
||||
", type='" + type + '\'' +
|
||||
'}';
|
||||
}
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ public class SensitiveWordHelperTest {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
List<IWordResult> wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
|
||||
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4}, WordResult{startIndex=9, endIndex=12}, WordResult{startIndex=18, endIndex=21}]", wordList.toString());
|
||||
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD'}, WordResult{startIndex=9, endIndex=12, type='WORD'}, WordResult{startIndex=18, endIndex=21, type='WORD'}]", wordList.toString());
|
||||
}
|
||||
|
||||
|
||||
@@ -99,7 +99,7 @@ public class SensitiveWordHelperTest {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw());
|
||||
Assert.assertEquals("WordResult{startIndex=0, endIndex=4}", word.toString());
|
||||
Assert.assertEquals("WordResult{startIndex=0, endIndex=4, type='WORD'}", word.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -27,7 +27,14 @@ public class WordResultHandlerTest {
|
||||
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList2.toString());
|
||||
|
||||
List<IWordResult> wordList3 = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
|
||||
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4}, WordResult{startIndex=9, endIndex=12}, WordResult{startIndex=18, endIndex=21}]", wordList3.toString());
|
||||
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD'}, WordResult{startIndex=9, endIndex=12, type='WORD'}, WordResult{startIndex=18, endIndex=21, type='WORD'}]", wordList3.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void findAllWordTest2() {
|
||||
final String text = "骂人:你他妈; 邮箱:123@qq.com; mobile: 13088889999; 网址:https://www.baidu.com";
|
||||
List<IWordResult> wordList3 = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
|
||||
Assert.assertEquals("[WordResult{startIndex=3, endIndex=6, type='WORD'}, WordResult{startIndex=11, endIndex=21, type='EMAIL'}, WordResult{startIndex=31, endIndex=42, type='NUM'}, WordResult{startIndex=55, endIndex=68, type='URL'}]", wordList3.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
Reference in New Issue
Block a user