release branch 0.14.0

This commit is contained in:
binbin.hou
2024-04-11 16:14:07 +08:00
parent 49dc76e330
commit a9ea2dc62e
17 changed files with 150 additions and 36 deletions

View File

@@ -285,3 +285,10 @@
| 5 | O | 移除 涿州、中国联通、中国网通、李老师、写字楼、人民银行、卡拉、牛魔王、 张杰、哪里找、爱因斯坦、天地之间、不玩了、里根、监听器、监听 | 2024-4-9 21:04:18 | |
| 6 | O | 移除 选举、登陆、中国移动、运营者、客户服务、精益求精、下载速度、好朋友、拦截器、账号、无界、深入浅出、腾讯、解码器、管理员、白皮书 | 2024-4-9 21:04:18 | |
| 7 | O | 移除 监听、运营商、一起玩、转化、超市 | 2024-4-9 21:04:18 | |
# release_0.14.0
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|------------|:-------------------|:-------------------------------------------------|
| 1 | A | 结果添加敏感词的类别 | 2024-4-11 15:02:25 | |

View File

@@ -52,6 +52,8 @@
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md)
V0.14.0: raw 添加敏感词类别。
## 更多资料
### 敏感词控台
@@ -82,7 +84,7 @@
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.13.4</version>
<version>0.14.0</version>
</dependency>
```
@@ -127,15 +129,6 @@ SensitiveWordHelper.findFirst(text) 等价于:
String word = SensitiveWordHelper.findFirst(text, WordResultHandlers.word());
```
WordResultHandlers.raw() 可以保留对应的下标信息:
```java
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw());
Assert.assertEquals("WordResult{startIndex=0, endIndex=4}", word.toString());
```
### 返回所有敏感词
```java
@@ -153,13 +146,12 @@ SensitiveWordHelper.findAll(text) 等价于:
List<String> wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.word());
```
WordResultHandlers.raw() 可以保留对应的下标信息:
WordResultHandlers.raw() 可以保留对应的下标信息、类别信息
```java
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
List<IWordResult> wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4}, WordResult{startIndex=9, endIndex=12}, WordResult{startIndex=18, endIndex=21}]", wordList.toString());
final String text = "骂人:你他妈; 邮箱123@qq.com; mobile: 13088889999; 网址https://www.baidu.com";
List<IWordResult> wordList3 = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
Assert.assertEquals("[WordResult{startIndex=3, endIndex=6, type='WORD'}, WordResult{startIndex=11, endIndex=21, type='EMAIL'}, WordResult{startIndex=31, endIndex=42, type='NUM'}, WordResult{startIndex=55, endIndex=68, type='URL'}]", wordList3.toString());
```
### 默认的替换策略

View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.13.4</version>
<version>0.14.0</version>
<properties>
<!--============================== All Plugins START ==============================-->

View File

@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
SET version=0.13.4
SET version=0.14.0
:::: 新版本名称
SET newVersion=0.14.0
SET newVersion=0.15.0
:::: 组织名称
SET groupName=com.github.houbb
:::: 项目名称

View File

@@ -21,4 +21,11 @@ public interface IWordResult {
*/
int endIndex();
/**
* 类别
* @return 类别
* @since 0.14.0
*/
String type();
}

View File

@@ -0,0 +1,31 @@
package com.github.houbb.sensitive.word.constant.enums;
/**
* 单词类别包含类别
* @since 0.14.0
*/
public enum WordTypeEnum {
WORD("WORD", "敏感词"),
EMAIL("EMAIL", "邮箱"),
URL("URL", "链接"),
NUM("NUM", "数字"),
DEFAULTS("DEFAULTS", "默认"),
;
private final String code;
private final String desc;
WordTypeEnum(String code, String desc) {
this.code = code;
this.desc = desc;
}
public String getCode() {
return code;
}
public String getDesc() {
return desc;
}
}

View File

@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.core;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.sensitive.word.api.*;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
import com.github.houbb.sensitive.word.support.check.WordCheckResult;
import com.github.houbb.sensitive.word.support.result.WordResult;
@@ -67,7 +68,8 @@ public class SensitiveWord extends AbstractSensitiveWord {
// 保存敏感词
WordResult wordResult = WordResult.newInstance()
.startIndex(i)
.endIndex(i+wordLength);
.endIndex(i+wordLength)
.type(checkResult.type());
//v0.13.0 添加判断
if(wordResultCondition.match(wordResult, text, modeEnum, context)) {
resultList.add(wordResult);

View File

@@ -30,18 +30,31 @@ public abstract class AbstractWordCheck implements IWordCheck {
*/
protected abstract int getActualLength(int beginIndex, final InnerSensitiveWordContext checkContext);
/**
* 获取类别
* @return 类别
* @since 0.14.0
*/
protected abstract String getType();
@Override
public WordCheckResult sensitiveCheck(int beginIndex,
final InnerSensitiveWordContext checkContext) {
Class<? extends IWordCheck> clazz = getSensitiveCheckClass();
final String txt = checkContext.originalText();
if(StringUtil.isEmpty(txt)) {
return WordCheckResult.of(0, clazz);
return WordCheckResult.newInstance()
.index(0)
.type(getType())
.checkClass(clazz);
}
int actualLength = getActualLength(beginIndex, checkContext);
return WordCheckResult.of(actualLength, clazz);
return WordCheckResult.newInstance()
.index(actualLength)
.type(getType())
.checkClass(clazz);
}
}

View File

@@ -6,6 +6,7 @@ import com.github.houbb.heaven.util.util.regex.RegexUtil;
import com.github.houbb.sensitive.word.api.IWordCheck;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.WordConst;
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
/**
* email 正则表达式检测实现。
@@ -39,6 +40,11 @@ public class WordCheckEmail extends AbstractConditionWordCheck {
return WordCheckEmail.class;
}
@Override
protected String getType() {
return WordTypeEnum.EMAIL.getCode();
}
@Override
protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) {
return CharUtil.isEmilChar(mappingChar);

View File

@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.support.check;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.IWordCheck;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
/**
* 未匹配
@@ -25,7 +26,10 @@ public class WordCheckNone implements IWordCheck {
/**
* 只有一个未匹配
*/
private static final WordCheckResult NONE_RESULT = WordCheckResult.of(0, WordCheckNone.class);
private static final WordCheckResult NONE_RESULT = WordCheckResult.newInstance()
.type(WordTypeEnum.DEFAULTS.getCode())
.index(0)
.checkClass(WordCheckNone.class);
public static WordCheckResult getNoneResult() {
return NONE_RESULT;

View File

@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.support.check;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.IWordCheck;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
/**
* 敏感词监测实现
@@ -28,6 +29,11 @@ public class WordCheckNum extends AbstractConditionWordCheck {
return WordCheckNum.class;
}
@Override
protected String getType() {
return WordTypeEnum.NUM.getCode();
}
@Override
protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) {
return Character.isDigit(mappingChar);

View File

@@ -24,17 +24,15 @@ public class WordCheckResult {
private Class<? extends IWordCheck> checkClass;
/**
* 实例化
* @param index 返回索引
* @param checkClass 验证类
* @return 结果
* @since 0.0.12
* 单词类别
* @since 0.14.0
*/
public static WordCheckResult of(final int index,
final Class<? extends IWordCheck> checkClass) {
WordCheckResult result = new WordCheckResult();
result.index(index).checkClass(checkClass);
return result;
private String type;
private WordCheckResult(){}
public static WordCheckResult newInstance() {
return new WordCheckResult();
}
public int index() {
@@ -55,11 +53,21 @@ public class WordCheckResult {
return this;
}
public String type() {
return type;
}
public WordCheckResult type(String type) {
this.type = type;
return this;
}
@Override
public String toString() {
return "SensitiveCheckResult{" +
return "WordCheckResult{" +
"index=" + index +
", checkClass=" + checkClass +
", type='" + type + '\'' +
'}';
}

View File

@@ -6,6 +6,7 @@ import com.github.houbb.heaven.util.util.regex.RegexUtil;
import com.github.houbb.sensitive.word.api.IWordCheck;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.WordConst;
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
/**
* URL 正则表达式检测实现。
@@ -36,6 +37,11 @@ public class WordCheckUrl extends AbstractConditionWordCheck {
return WordCheckUrl.class;
}
@Override
protected String getType() {
return WordTypeEnum.URL.getCode();
}
@Override
protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) {
return CharUtil.isWebSiteChar(mappingChar);

View File

@@ -6,6 +6,7 @@ import com.github.houbb.sensitive.word.api.IWordCheck;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordData;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
@@ -83,4 +84,9 @@ public class WordCheckWord extends AbstractWordCheck {
return actualLength;
}
@Override
protected String getType() {
return WordTypeEnum.WORD.getCode();
}
}

View File

@@ -12,6 +12,14 @@ public class WordResult implements IWordResult {
private int endIndex;
/**
* 词类别
* @since 0.14.0
*/
private String type;
private WordResult(){}
public static WordResult newInstance() {
return new WordResult();
}
@@ -36,11 +44,22 @@ public class WordResult implements IWordResult {
return this;
}
@Override
public String type() {
return type;
}
public WordResult type(String type) {
this.type = type;
return this;
}
@Override
public String toString() {
return "WordResult{" +
"startIndex=" + startIndex +
", endIndex=" + endIndex +
", type='" + type + '\'' +
'}';
}

View File

@@ -62,7 +62,7 @@ public class SensitiveWordHelperTest {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
List<IWordResult> wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4}, WordResult{startIndex=9, endIndex=12}, WordResult{startIndex=18, endIndex=21}]", wordList.toString());
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD'}, WordResult{startIndex=9, endIndex=12, type='WORD'}, WordResult{startIndex=18, endIndex=21, type='WORD'}]", wordList.toString());
}
@@ -99,7 +99,7 @@ public class SensitiveWordHelperTest {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw());
Assert.assertEquals("WordResult{startIndex=0, endIndex=4}", word.toString());
Assert.assertEquals("WordResult{startIndex=0, endIndex=4, type='WORD'}", word.toString());
}
/**

View File

@@ -27,7 +27,14 @@ public class WordResultHandlerTest {
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList2.toString());
List<IWordResult> wordList3 = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4}, WordResult{startIndex=9, endIndex=12}, WordResult{startIndex=18, endIndex=21}]", wordList3.toString());
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD'}, WordResult{startIndex=9, endIndex=12, type='WORD'}, WordResult{startIndex=18, endIndex=21, type='WORD'}]", wordList3.toString());
}
@Test
public void findAllWordTest2() {
final String text = "骂人:你他妈; 邮箱123@qq.com; mobile: 13088889999; 网址https://www.baidu.com";
List<IWordResult> wordList3 = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
Assert.assertEquals("[WordResult{startIndex=3, endIndex=6, type='WORD'}, WordResult{startIndex=11, endIndex=21, type='EMAIL'}, WordResult{startIndex=31, endIndex=42, type='NUM'}, WordResult{startIndex=55, endIndex=68, type='URL'}]", wordList3.toString());
}
@Test