mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
release branch 0.17.0
This commit is contained in:
@@ -318,3 +318,9 @@
|
||||
| 1 | O | 敏感词优化移除: 译码器/鞋帽/破解/看下/快乐水/冷却/招聘/搬迁/机票/谣言/第4代/赚钱/1989年/贫穷/护士/工作人员/网通/超速/明心/人民/服务管理/刺激/心水/ | 2024-6-01 15:02:25 | |
|
||||
| 2 | O | 敏感词优化移除: 影院/公关/韦小宝/挣钱/资本主义/人也/战争/水浒/女装/装修/妹妹/他爸爸/他妈妈 | 2024-6-01 15:02:25 | |
|
||||
| 3 | O | 默认关闭数字+网址+邮箱的校验 | 2024-6-01 15:02:25 | 降低误判率 |
|
||||
|
||||
# release_0.17.0
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|---------|:------------------|:------|
|
||||
| 1 | A | IPV4 校验 | 2024-6-01 15:02:25 | https://github.com/houbb/sensitive-word/issues/43 |
|
||||
|
||||
31
README.md
31
README.md
@@ -353,6 +353,8 @@ Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString());
|
||||
|
||||
### 邮箱检测
|
||||
|
||||
邮箱等个人信息,默认未启用。
|
||||
|
||||
```java
|
||||
final String text = "楼主好人,邮箱 sensitiveword@xx.com";
|
||||
List<String> wordList = SensitiveWordBs.newInstance().enableEmailCheck(true).init().findAll(text);
|
||||
@@ -361,7 +363,7 @@ Assert.assertEquals("[sensitiveword@xx.com]", wordList.toString());
|
||||
|
||||
### 连续数字检测
|
||||
|
||||
一般用于过滤手机号/QQ等广告信息。
|
||||
一般用于过滤手机号/QQ等广告信息,默认未启用。
|
||||
|
||||
V0.2.1 之后,支持通过 `numCheckLen(长度)` 自定义检测的长度。
|
||||
|
||||
@@ -385,7 +387,7 @@ Assert.assertEquals("[]", wordList2.toString());
|
||||
|
||||
### 网址检测
|
||||
|
||||
用于过滤常见的网址信息。
|
||||
用于过滤常见的网址信息,默认未启用。
|
||||
|
||||
```java
|
||||
final String text = "点击链接 www.baidu.com查看答案";
|
||||
@@ -398,6 +400,19 @@ Assert.assertEquals("[www.baidu.com]", wordList.toString());
|
||||
Assert.assertEquals("点击链接 *************查看答案", sensitiveWordBs.replace(text));
|
||||
```
|
||||
|
||||
### IPV4 检测
|
||||
|
||||
v0.17.0 支持
|
||||
|
||||
避免用户通过 ip 绕过网址检测等,默认未启用。
|
||||
|
||||
```java
|
||||
final String text = "个人网站,如果网址打不开可以访问 127.0.0.1。";
|
||||
final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().enableIpv4Check(true).init();
|
||||
List<String> wordList = sensitiveWordBs.findAll(text);
|
||||
Assert.assertEquals("[127.0.0.1]", wordList.toString());
|
||||
```
|
||||
|
||||
# 引导类特性配置
|
||||
|
||||
## 说明
|
||||
@@ -423,6 +438,7 @@ SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
|
||||
.enableNumCheck(false)
|
||||
.enableEmailCheck(false)
|
||||
.enableUrlCheck(false)
|
||||
.enableIpv4Check(false)
|
||||
.enableWordCheck(true)
|
||||
.numCheckLen(8)
|
||||
.wordTag(WordTags.none())
|
||||
@@ -448,11 +464,12 @@ Assert.assertTrue(wordBs.contains(text));
|
||||
| 7 | enableNumCheck | 是否启用数字检测。 | false |
|
||||
| 8 | enableEmailCheck | 是有启用邮箱检测 | false |
|
||||
| 9 | enableUrlCheck | 是否启用链接检测 | false |
|
||||
| 10 | enableWordCheck | 是否启用敏感单词检测 | true |
|
||||
| 11 | numCheckLen | 数字检测,自定义指定长度。 | 8 |
|
||||
| 12 | wordTag | 词对应的标签 | none |
|
||||
| 13 | charIgnore | 忽略的字符 | none |
|
||||
| 14 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 |
|
||||
| 10 | enableIpv4Check | 是否启用IPv4检测 | false |
|
||||
| 11 | enableWordCheck | 是否启用敏感单词检测 | true |
|
||||
| 12 | numCheckLen | 数字检测,自定义指定长度。 | 8 |
|
||||
| 13 | wordTag | 词对应的标签 | none |
|
||||
| 14 | charIgnore | 忽略的字符 | none |
|
||||
| 15 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 |
|
||||
|
||||
## 内存的释放
|
||||
|
||||
|
||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.16.2</version>
|
||||
<version>0.17.0</version>
|
||||
|
||||
<properties>
|
||||
<!--============================== All Plugins START ==============================-->
|
||||
|
||||
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
|
||||
|
||||
:: 版本号信息(需要手动指定)
|
||||
:::: 旧版本名称
|
||||
SET version=0.16.2
|
||||
SET version=0.17.0
|
||||
:::: 新版本名称
|
||||
SET newVersion=0.17.0
|
||||
SET newVersion=0.18.0
|
||||
:::: 组织名称
|
||||
SET groupName=com.github.houbb
|
||||
:::: 项目名称
|
||||
|
||||
@@ -126,6 +126,21 @@ public interface IWordContext {
|
||||
*/
|
||||
IWordContext enableUrlCheck(final boolean enableUrlCheck);
|
||||
|
||||
/**
|
||||
* 是否启用 ipv4 校验
|
||||
* @return 结果
|
||||
* @since 0.17.0
|
||||
*/
|
||||
boolean enableIpv4Check();
|
||||
|
||||
/**
|
||||
* 是否启用 ipv4 校验
|
||||
* @param enableIpv4Check 是否启用
|
||||
* @return this
|
||||
* @since 0.17.0
|
||||
*/
|
||||
IWordContext enableIpv4Check(final boolean enableIpv4Check);
|
||||
|
||||
/**
|
||||
* 忽略英文的写法
|
||||
* @return 数字检测
|
||||
|
||||
@@ -90,6 +90,12 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
||||
*/
|
||||
private boolean enableWordCheck = true;
|
||||
|
||||
/**
|
||||
* 是否启用 ipv4 校验
|
||||
* @since 0.17.0
|
||||
*/
|
||||
private boolean enableIpv4Check = false;
|
||||
|
||||
// 额外配置
|
||||
/**
|
||||
* 检测数字时的长度
|
||||
@@ -233,6 +239,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
||||
context.enableEmailCheck(enableEmailCheck);
|
||||
context.enableUrlCheck(enableUrlCheck);
|
||||
context.enableWordCheck(enableWordCheck);
|
||||
context.enableIpv4Check(enableIpv4Check);
|
||||
|
||||
// 额外配置
|
||||
context.sensitiveCheckNumLen(numCheckLen);
|
||||
@@ -343,6 +350,16 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
||||
}
|
||||
|
||||
//-------------------------------------------------------- 基础属性设置
|
||||
/**
|
||||
* 是否启用 ipv4 校验
|
||||
* @param enableIpv4Check 是否启用
|
||||
* @return this
|
||||
* @since 0.17.0
|
||||
*/
|
||||
public SensitiveWordBs enableIpv4Check(boolean enableIpv4Check) {
|
||||
this.enableIpv4Check = enableIpv4Check;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置是否启动数字检测
|
||||
|
||||
@@ -69,6 +69,12 @@ public class SensitiveWordContext implements IWordContext {
|
||||
*/
|
||||
private boolean enableUrlCheck;
|
||||
|
||||
/**
|
||||
* 是否启用 ipv4 校验
|
||||
* @since 0.17.0
|
||||
*/
|
||||
private boolean enableIpv4Check;
|
||||
|
||||
/**
|
||||
* 敏感数字检测对应的长度限制
|
||||
* @since 0.2.1
|
||||
@@ -218,6 +224,17 @@ public class SensitiveWordContext implements IWordContext {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean enableIpv4Check() {
|
||||
return enableIpv4Check;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SensitiveWordContext enableIpv4Check(boolean enableIpv4Check) {
|
||||
this.enableIpv4Check = enableIpv4Check;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean ignoreChineseStyle() {
|
||||
return ignoreChineseStyle;
|
||||
|
||||
@@ -9,6 +9,7 @@ public enum WordTypeEnum {
|
||||
EMAIL("EMAIL", "邮箱"),
|
||||
URL("URL", "链接"),
|
||||
NUM("NUM", "数字"),
|
||||
IPV4("IPV4", "IPv4"),
|
||||
|
||||
DEFAULTS("DEFAULTS", "默认"),
|
||||
;
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
package com.github.houbb.sensitive.word.support.check;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.util.lang.CharUtil;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.heaven.util.util.regex.RegexUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordCheck;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.WordConst;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
|
||||
import com.github.houbb.sensitive.word.utils.InnerCharUtils;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* IPV4 检测
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.17.0
|
||||
*/
|
||||
@ThreadSafe
|
||||
public class WordCheckIPV4 extends AbstractConditionWordCheck {
|
||||
|
||||
private static final IWordCheck INSTANCE = new WordCheckIPV4();
|
||||
|
||||
public static IWordCheck getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Class<? extends IWordCheck> getSensitiveCheckClass() {
|
||||
return WordCheckIPV4.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getType() {
|
||||
return WordTypeEnum.IPV4.getCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) {
|
||||
return CharUtil.isNumber(mappingChar) || '.' == mappingChar;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveWordContext checkContext) {
|
||||
int bufferLen = stringBuilder.length();
|
||||
//0.0.0.0
|
||||
//255.255.255.255
|
||||
if(bufferLen < 7
|
||||
|| bufferLen > 15) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 尽可能减少对象的创建
|
||||
String string = stringBuilder.toString();
|
||||
List<String> stringList = StringUtil.splitToList(string, '.');
|
||||
if(stringList.size() != 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for(String numStr : stringList) {
|
||||
int integer = InnerCharUtils.parseInt(numStr);
|
||||
|
||||
if(integer < 0 || integer > 256) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// 额外处理
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -18,32 +18,6 @@ public final class WordChecks {
|
||||
|
||||
private WordChecks(){}
|
||||
|
||||
/**
|
||||
* 初始化敏感检测策略
|
||||
* @param context 上下文
|
||||
*
|
||||
* @return 实现
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public static IWordCheck initSensitiveCheck(final IWordContext context) {
|
||||
List<IWordCheck> sensitiveCheckList = new ArrayList<>();
|
||||
|
||||
if(context.enableWordCheck()) {
|
||||
sensitiveCheckList.add(WordChecks.word());
|
||||
}
|
||||
if(context.enableNumCheck()) {
|
||||
sensitiveCheckList.add(WordChecks.num());
|
||||
}
|
||||
if(context.enableEmailCheck()) {
|
||||
sensitiveCheckList.add(WordChecks.email());
|
||||
}
|
||||
if(context.enableUrlCheck()) {
|
||||
sensitiveCheckList.add(WordChecks.url());
|
||||
}
|
||||
|
||||
return WordChecks.chains(sensitiveCheckList);
|
||||
}
|
||||
|
||||
public static IWordCheck chains(final IWordCheck... sensitiveChecks) {
|
||||
if (ArrayUtil.isEmpty(sensitiveChecks)){
|
||||
return none();
|
||||
@@ -94,4 +68,13 @@ public final class WordChecks {
|
||||
return WordCheckNone.getInstance();
|
||||
}
|
||||
|
||||
/**
|
||||
* ipv4 校验
|
||||
* @since 0.17.0
|
||||
* @return 实现
|
||||
*/
|
||||
public static IWordCheck ipv4() {
|
||||
return WordCheckIPV4.getInstance();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -29,6 +29,9 @@ public class WordCheckCombine extends AbstractWordCheckCombine {
|
||||
if(context.enableUrlCheck()) {
|
||||
wordCheckList.add(WordChecks.url());
|
||||
}
|
||||
if(context.enableIpv4Check()) {
|
||||
wordCheckList.add(WordChecks.ipv4());
|
||||
}
|
||||
|
||||
return wordCheckList;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
package com.github.houbb.sensitive.word.utils;
|
||||
|
||||
/**
|
||||
* @since 0.17.0
|
||||
*/
|
||||
public class InnerCharUtils {
|
||||
|
||||
/**
|
||||
* 转换为整数
|
||||
* @param text 文本
|
||||
* @return 整数
|
||||
* @since 1.18.0
|
||||
*/
|
||||
public static int parseInt(String text) {
|
||||
int len = text.length();
|
||||
|
||||
int sum = 0;
|
||||
|
||||
int weight = 1;
|
||||
char[] chars = text.toCharArray();
|
||||
for(int i = len-1; i >= 0; i--) {
|
||||
int val = getCharInt(chars[i]);
|
||||
|
||||
sum += weight * val;
|
||||
|
||||
weight *= 10;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取 int char 对应的真实值
|
||||
* @param c 字符
|
||||
* @return 结果
|
||||
* @since 1.18.0
|
||||
*/
|
||||
public static int getCharInt(final char c) {
|
||||
return c - '0';
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
package ai.com.github.houbb.sensitive.word.utils;
|
||||
@@ -37,6 +37,8 @@ public class SensitiveWordBsConfigTest {
|
||||
.enableNumCheck(false)
|
||||
.enableEmailCheck(false)
|
||||
.enableUrlCheck(false)
|
||||
.enableIpv4Check(false)
|
||||
.enableWordCheck(true)
|
||||
.numCheckLen(8)
|
||||
.wordTag(WordTags.none())
|
||||
.charIgnore(SensitiveWordCharIgnores.defaults())
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
||||
*/
|
||||
public class SensitiveWordBsIpv4Test {
|
||||
|
||||
/**
|
||||
* ipv4 地址
|
||||
* @since 0.17.0
|
||||
*/
|
||||
@Test
|
||||
public void defaultTest() {
|
||||
final String text = "个人网站,如果网址打不开可以访问 127.0.0.1。";
|
||||
final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().init();
|
||||
|
||||
List<String> wordList = sensitiveWordBs.findAll(text);
|
||||
Assert.assertEquals("[]", wordList.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* ipv4 地址
|
||||
* @since 0.17.0
|
||||
*/
|
||||
@Test
|
||||
public void ipv4CheckTest() {
|
||||
final String text = "个人网站,如果网址打不开可以访问 127.0.0.1。";
|
||||
final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().enableIpv4Check(true).init();
|
||||
List<String> wordList = sensitiveWordBs.findAll(text);
|
||||
Assert.assertEquals("[127.0.0.1]", wordList.toString());
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user