release branch 0.17.0

This commit is contained in:
houbb
2024-06-01 16:42:53 +08:00
parent 1b227d2a6a
commit 894f690b6a
15 changed files with 250 additions and 37 deletions

View File

@@ -318,3 +318,9 @@
| 1 | O | 敏感词优化移除: 译码器/鞋帽/破解/看下/快乐水/冷却/招聘/搬迁/机票/谣言/第4代/赚钱/1989年/贫穷/护士/工作人员/网通/超速/明心/人民/服务管理/刺激/心水/ | 2024-6-01 15:02:25 | |
| 2 | O | 敏感词优化移除: 影院/公关/韦小宝/挣钱/资本主义/人也/战争/水浒/女装/装修/妹妹/他爸爸/他妈妈 | 2024-6-01 15:02:25 | |
| 3 | O | 默认关闭数字+网址+邮箱的校验 | 2024-6-01 15:02:25 | 降低误判率 |
# release_0.17.0
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|---------|:------------------|:------|
| 1 | A | IPV4 校验 | 2024-6-01 15:02:25 | https://github.com/houbb/sensitive-word/issues/43 |

View File

@@ -353,6 +353,8 @@ Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString());
### 邮箱检测
邮箱等个人信息,默认未启用。
```java
final String text = "楼主好人,邮箱 sensitiveword@xx.com";
List<String> wordList = SensitiveWordBs.newInstance().enableEmailCheck(true).init().findAll(text);
@@ -361,7 +363,7 @@ Assert.assertEquals("[sensitiveword@xx.com]", wordList.toString());
### 连续数字检测
一般用于过滤手机号/QQ等广告信息。
一般用于过滤手机号/QQ等广告信息,默认未启用
V0.2.1 之后,支持通过 `numCheckLen(长度)` 自定义检测的长度。
@@ -385,7 +387,7 @@ Assert.assertEquals("[]", wordList2.toString());
### 网址检测
用于过滤常见的网址信息。
用于过滤常见的网址信息,默认未启用
```java
final String text = "点击链接 www.baidu.com查看答案";
@@ -398,6 +400,19 @@ Assert.assertEquals("[www.baidu.com]", wordList.toString());
Assert.assertEquals("点击链接 *************查看答案", sensitiveWordBs.replace(text));
```
### IPV4 检测
v0.17.0 支持
避免用户通过 ip 绕过网址检测等,默认未启用。
```java
final String text = "个人网站,如果网址打不开可以访问 127.0.0.1。";
final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().enableIpv4Check(true).init();
List<String> wordList = sensitiveWordBs.findAll(text);
Assert.assertEquals("[127.0.0.1]", wordList.toString());
```
# 引导类特性配置
## 说明
@@ -423,6 +438,7 @@ SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
.enableNumCheck(false)
.enableEmailCheck(false)
.enableUrlCheck(false)
.enableIpv4Check(false)
.enableWordCheck(true)
.numCheckLen(8)
.wordTag(WordTags.none())
@@ -448,11 +464,12 @@ Assert.assertTrue(wordBs.contains(text));
| 7 | enableNumCheck | 是否启用数字检测。 | false |
| 8 | enableEmailCheck | 是有启用邮箱检测 | false |
| 9 | enableUrlCheck | 是否启用链接检测 | false |
| 10 | enableWordCheck | 是否启用敏感单词检测 | true |
| 11 | numCheckLen | 数字检测,自定义指定长度。 | 8 |
| 12 | wordTag | 词对应的标签 | none |
| 13 | charIgnore | 忽略的字符 | none |
| 14 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 |
| 10 | enableIpv4Check | 是否启用IPv4检测 | false |
| 11 | enableWordCheck | 是否启用敏感单词检测 | true |
| 12 | numCheckLen | 数字检测,自定义指定长度。 | 8 |
| 13 | wordTag | 词对应的标签 | none |
| 14 | charIgnore | 忽略的字符 | none |
| 15 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 |
## 内存的释放

View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.16.2</version>
<version>0.17.0</version>
<properties>
<!--============================== All Plugins START ==============================-->

View File

@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
SET version=0.16.2
SET version=0.17.0
:::: 新版本名称
SET newVersion=0.17.0
SET newVersion=0.18.0
:::: 组织名称
SET groupName=com.github.houbb
:::: 项目名称

View File

@@ -126,6 +126,21 @@ public interface IWordContext {
*/
IWordContext enableUrlCheck(final boolean enableUrlCheck);
/**
* 是否启用 ipv4 校验
* @return 结果
* @since 0.17.0
*/
boolean enableIpv4Check();
/**
* 是否启用 ipv4 校验
* @param enableIpv4Check 是否启用
* @return this
* @since 0.17.0
*/
IWordContext enableIpv4Check(final boolean enableIpv4Check);
/**
* 忽略英文的写法
* @return 数字检测

View File

@@ -90,6 +90,12 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
*/
private boolean enableWordCheck = true;
/**
* 是否启用 ipv4 校验
* @since 0.17.0
*/
private boolean enableIpv4Check = false;
// 额外配置
/**
* 检测数字时的长度
@@ -233,6 +239,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
context.enableEmailCheck(enableEmailCheck);
context.enableUrlCheck(enableUrlCheck);
context.enableWordCheck(enableWordCheck);
context.enableIpv4Check(enableIpv4Check);
// 额外配置
context.sensitiveCheckNumLen(numCheckLen);
@@ -343,6 +350,16 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
}
//-------------------------------------------------------- 基础属性设置
/**
* 是否启用 ipv4 校验
* @param enableIpv4Check 是否启用
* @return this
* @since 0.17.0
*/
public SensitiveWordBs enableIpv4Check(boolean enableIpv4Check) {
this.enableIpv4Check = enableIpv4Check;
return this;
}
/**
* 设置是否启动数字检测

View File

@@ -69,6 +69,12 @@ public class SensitiveWordContext implements IWordContext {
*/
private boolean enableUrlCheck;
/**
* 是否启用 ipv4 校验
* @since 0.17.0
*/
private boolean enableIpv4Check;
/**
* 敏感数字检测对应的长度限制
* @since 0.2.1
@@ -218,6 +224,17 @@ public class SensitiveWordContext implements IWordContext {
return this;
}
@Override
public boolean enableIpv4Check() {
return enableIpv4Check;
}
@Override
public SensitiveWordContext enableIpv4Check(boolean enableIpv4Check) {
this.enableIpv4Check = enableIpv4Check;
return this;
}
@Override
public boolean ignoreChineseStyle() {
return ignoreChineseStyle;

View File

@@ -9,6 +9,7 @@ public enum WordTypeEnum {
EMAIL("EMAIL", "邮箱"),
URL("URL", "链接"),
NUM("NUM", "数字"),
IPV4("IPV4", "IPv4"),
DEFAULTS("DEFAULTS", "默认"),
;

View File

@@ -0,0 +1,74 @@
package com.github.houbb.sensitive.word.support.check;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.heaven.util.util.regex.RegexUtil;
import com.github.houbb.sensitive.word.api.IWordCheck;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.WordConst;
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
import com.github.houbb.sensitive.word.utils.InnerCharUtils;
import java.util.List;
/**
* IPV4 检测
*
* @author binbin.hou
* @since 0.17.0
*/
@ThreadSafe
public class WordCheckIPV4 extends AbstractConditionWordCheck {
private static final IWordCheck INSTANCE = new WordCheckIPV4();
public static IWordCheck getInstance() {
return INSTANCE;
}
@Override
protected Class<? extends IWordCheck> getSensitiveCheckClass() {
return WordCheckIPV4.class;
}
@Override
protected String getType() {
return WordTypeEnum.IPV4.getCode();
}
@Override
protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) {
return CharUtil.isNumber(mappingChar) || '.' == mappingChar;
}
@Override
protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveWordContext checkContext) {
int bufferLen = stringBuilder.length();
//0.0.0.0
//255.255.255.255
if(bufferLen < 7
|| bufferLen > 15) {
return false;
}
// 尽可能减少对象的创建
String string = stringBuilder.toString();
List<String> stringList = StringUtil.splitToList(string, '.');
if(stringList.size() != 4) {
return false;
}
for(String numStr : stringList) {
int integer = InnerCharUtils.parseInt(numStr);
if(integer < 0 || integer > 256) {
return false;
}
}
// 额外处理
return true;
}
}

View File

@@ -18,32 +18,6 @@ public final class WordChecks {
private WordChecks(){}
/**
* 初始化敏感检测策略
* @param context 上下文
*
* @return 实现
* @since 0.3.0
*/
public static IWordCheck initSensitiveCheck(final IWordContext context) {
List<IWordCheck> sensitiveCheckList = new ArrayList<>();
if(context.enableWordCheck()) {
sensitiveCheckList.add(WordChecks.word());
}
if(context.enableNumCheck()) {
sensitiveCheckList.add(WordChecks.num());
}
if(context.enableEmailCheck()) {
sensitiveCheckList.add(WordChecks.email());
}
if(context.enableUrlCheck()) {
sensitiveCheckList.add(WordChecks.url());
}
return WordChecks.chains(sensitiveCheckList);
}
public static IWordCheck chains(final IWordCheck... sensitiveChecks) {
if (ArrayUtil.isEmpty(sensitiveChecks)){
return none();
@@ -94,4 +68,13 @@ public final class WordChecks {
return WordCheckNone.getInstance();
}
/**
* ipv4 校验
* @since 0.17.0
* @return 实现
*/
public static IWordCheck ipv4() {
return WordCheckIPV4.getInstance();
}
}

View File

@@ -29,6 +29,9 @@ public class WordCheckCombine extends AbstractWordCheckCombine {
if(context.enableUrlCheck()) {
wordCheckList.add(WordChecks.url());
}
if(context.enableIpv4Check()) {
wordCheckList.add(WordChecks.ipv4());
}
return wordCheckList;
}

View File

@@ -0,0 +1,41 @@
package com.github.houbb.sensitive.word.utils;
/**
* @since 0.17.0
*/
public class InnerCharUtils {
/**
* 转换为整数
* @param text 文本
* @return 整数
* @since 1.18.0
*/
public static int parseInt(String text) {
int len = text.length();
int sum = 0;
int weight = 1;
char[] chars = text.toCharArray();
for(int i = len-1; i >= 0; i--) {
int val = getCharInt(chars[i]);
sum += weight * val;
weight *= 10;
}
return sum;
}
/**
* 获取 int char 对应的真实值
* @param c 字符
* @return 结果
* @since 1.18.0
*/
public static int getCharInt(final char c) {
return c - '0';
}
}

View File

@@ -1 +0,0 @@
package ai.com.github.houbb.sensitive.word.utils;

View File

@@ -37,6 +37,8 @@ public class SensitiveWordBsConfigTest {
.enableNumCheck(false)
.enableEmailCheck(false)
.enableUrlCheck(false)
.enableIpv4Check(false)
.enableWordCheck(true)
.numCheckLen(8)
.wordTag(WordTags.none())
.charIgnore(SensitiveWordCharIgnores.defaults())

View File

@@ -0,0 +1,38 @@
package com.github.houbb.sensitive.word.bs;
import org.junit.Assert;
import org.junit.Test;
import java.util.List;
/**
*/
public class SensitiveWordBsIpv4Test {
/**
* ipv4 地址
* @since 0.17.0
*/
@Test
public void defaultTest() {
final String text = "个人网站,如果网址打不开可以访问 127.0.0.1。";
final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().init();
List<String> wordList = sensitiveWordBs.findAll(text);
Assert.assertEquals("[]", wordList.toString());
}
/**
* ipv4 地址
* @since 0.17.0
*/
@Test
public void ipv4CheckTest() {
final String text = "个人网站,如果网址打不开可以访问 127.0.0.1。";
final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().enableIpv4Check(true).init();
List<String> wordList = sensitiveWordBs.findAll(text);
Assert.assertEquals("[127.0.0.1]", wordList.toString());
}
}