mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
Merge branch 'houbb:master' into master
This commit is contained in:
@@ -393,3 +393,30 @@
|
|||||||
|:---|:-----|----------------|:--------------------|:--------------|
|
|:---|:-----|----------------|:--------------------|:--------------|
|
||||||
| 1 | A | 内置支持多个单词标签实现策略 | 2024-12-22 14:08:20 | 强化单词标签能力,方便复用 |
|
| 1 | A | 内置支持多个单词标签实现策略 | 2024-12-22 14:08:20 | 强化单词标签能力,方便复用 |
|
||||||
| 2 | O | 升级 heaven 依赖 | 2024-12-22 14:08:20 | |
|
| 2 | O | 升级 heaven 依赖 | 2024-12-22 14:08:20 | |
|
||||||
|
|
||||||
|
# release_0.24.1
|
||||||
|
|
||||||
|
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||||
|
|:---|:-----|------------|:------------------|:---------------|
|
||||||
|
| 1 | F | 删除时添加同步锁优化 | 2025-2-2 15:30:26 | 涉及到接口调整 PR-100 |
|
||||||
|
|
||||||
|
# release_0.24.2
|
||||||
|
|
||||||
|
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||||
|
|:---|:-----|---------------------|:------------------|:-------------------|
|
||||||
|
| 1 | O | findFirst 真实实现,性能优化 | 2025-2-2 15:30:26 | PR-99 |
|
||||||
|
| 2 | O | 黑白名单遍历统一优化,性能优化 | 2025-2-2 15:30:26 | PR-99 涉及到原始结果返回值调整 |
|
||||||
|
|
||||||
|
# release_0.25.0
|
||||||
|
|
||||||
|
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||||
|
|:---|:-----|----------------------|:-------------------|:-----|
|
||||||
|
| 1 | A | wordCheck 策略支持用户自定义 | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
|
||||||
|
| 2 | A | wordCheckUrlNoPrefix | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
|
||||||
|
|
||||||
|
# release_0.25.0
|
||||||
|
|
||||||
|
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||||
|
|:---|:-----|----------------------|:-------------------|:-----|
|
||||||
|
| 1 | A | wordCheck 策略支持用户自定义 | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
|
||||||
|
| 2 | A | wordCheckUrlNoPrefix | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
|
||||||
|
|||||||
54
README.md
54
README.md
@@ -58,14 +58,6 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大
|
|||||||
|
|
||||||
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md)
|
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md)
|
||||||
|
|
||||||
### V0.23.0
|
|
||||||
|
|
||||||
- 结果条件拓展支持 wordTags 和 chains
|
|
||||||
|
|
||||||
### V0.24.0
|
|
||||||
|
|
||||||
- 初步内置实现单词标签,丰富单词标签内置策略
|
|
||||||
|
|
||||||
## 更多资料
|
## 更多资料
|
||||||
|
|
||||||
### 敏感词控台
|
### 敏感词控台
|
||||||
@@ -104,7 +96,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.github.houbb</groupId>
|
<groupId>com.github.houbb</groupId>
|
||||||
<artifactId>sensitive-word</artifactId>
|
<artifactId>sensitive-word</artifactId>
|
||||||
<version>0.24.0</version>
|
<version>0.25.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -370,6 +362,22 @@ Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString());
|
|||||||
|
|
||||||
## 更多检测策略
|
## 更多检测策略
|
||||||
|
|
||||||
|
### 说明
|
||||||
|
|
||||||
|
v0.25.0 目前的几个策略,也支持用户引导类自定义。所有的策略都是接口,支持用户自定义实现。
|
||||||
|
|
||||||
|
| 序号 | 方法 | 说明 | 默认值 |
|
||||||
|
|:---|:---------------------|:-------------------------------------------|:------|
|
||||||
|
| 16 | wordCheckNum | 数字检测策略(v0.25.0开始支持) | `WordChecks.num()` |
|
||||||
|
| 17 | wordCheckEmail | 邮箱检测策略(v0.25.0开始支持) | `WordChecks.email()` |
|
||||||
|
| 18 | wordCheckUrl | URL检测策略(v0.25.0开始支持),内置还是实现了 `urlNoPrefix()` | `(WordChecks.url()` |
|
||||||
|
| 19 | wordCheckIpv4 | ipv4检测策略(v0.25.0开始支持) | `WordChecks.ipv4()` |
|
||||||
|
| 20 | wordCheckWord | 敏感词检测策略(v0.25.0开始支持) | `WordChecks.word()` |
|
||||||
|
|
||||||
|
内置实现:
|
||||||
|
|
||||||
|
a) `WordChecks.urlNoPrefix()` 作为 url 的额外实现,可以不需要 `https://` 和 `http://` 前缀。
|
||||||
|
|
||||||
### 邮箱检测
|
### 邮箱检测
|
||||||
|
|
||||||
邮箱等个人信息,默认未启用。
|
邮箱等个人信息,默认未启用。
|
||||||
@@ -418,6 +426,21 @@ Assert.assertEquals("[https://www.baidu.com]", wordList.toString());
|
|||||||
Assert.assertEquals("点击链接 ********************* 查看答案", sensitiveWordBs.replace(text));
|
Assert.assertEquals("点击链接 ********************* 查看答案", sensitiveWordBs.replace(text));
|
||||||
```
|
```
|
||||||
|
|
||||||
|
v0.25.0 内置支持不需要 http 协议的前缀检测:
|
||||||
|
|
||||||
|
```java
|
||||||
|
final String text = "点击链接 https://www.baidu.com 查看答案,当然也可以是 baidu.com、www.baidu.com";
|
||||||
|
|
||||||
|
final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
|
||||||
|
.enableUrlCheck(true) // 启用URL检测
|
||||||
|
.wordCheckUrl(WordChecks.urlNoPrefix()) //指定检测的方式
|
||||||
|
.init();
|
||||||
|
List<String> wordList = sensitiveWordBs.findAll(text);
|
||||||
|
Assert.assertEquals("[www.baidu.com, baidu.com, www.baidu.com]", wordList.toString());
|
||||||
|
|
||||||
|
Assert.assertEquals("点击链接 https://************* 查看答案,当然也可以是 *********、*************", sensitiveWordBs.replace(text));
|
||||||
|
```
|
||||||
|
|
||||||
### IPV4 检测
|
### IPV4 检测
|
||||||
|
|
||||||
v0.17.0 支持
|
v0.17.0 支持
|
||||||
@@ -460,6 +483,11 @@ SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
|
|||||||
.enableUrlCheck(false)
|
.enableUrlCheck(false)
|
||||||
.enableIpv4Check(false)
|
.enableIpv4Check(false)
|
||||||
.enableWordCheck(true)
|
.enableWordCheck(true)
|
||||||
|
.wordCheckNum(WordChecks.num())
|
||||||
|
.wordCheckEmail(WordChecks.email())
|
||||||
|
.wordCheckUrl(WordChecks.url())
|
||||||
|
.wordCheckIpv4(WordChecks.ipv4())
|
||||||
|
.wordCheckWord(WordChecks.word())
|
||||||
.numCheckLen(8)
|
.numCheckLen(8)
|
||||||
.wordTag(WordTags.none())
|
.wordTag(WordTags.none())
|
||||||
.charIgnore(SensitiveWordCharIgnores.defaults())
|
.charIgnore(SensitiveWordCharIgnores.defaults())
|
||||||
@@ -474,7 +502,7 @@ Assert.assertTrue(wordBs.contains(text));
|
|||||||
其中各项配置的说明如下:
|
其中各项配置的说明如下:
|
||||||
|
|
||||||
| 序号 | 方法 | 说明 | 默认值 |
|
| 序号 | 方法 | 说明 | 默认值 |
|
||||||
|:---|:---------------------|:-----------------------------|:------|
|
|:---|:--------------------|:-----------------------------|:------|
|
||||||
| 1 | ignoreCase | 忽略大小写 | true |
|
| 1 | ignoreCase | 忽略大小写 | true |
|
||||||
| 2 | ignoreWidth | 忽略半角圆角 | true |
|
| 2 | ignoreWidth | 忽略半角圆角 | true |
|
||||||
| 3 | ignoreNumStyle | 忽略数字的写法 | true |
|
| 3 | ignoreNumStyle | 忽略数字的写法 | true |
|
||||||
@@ -490,6 +518,12 @@ Assert.assertTrue(wordBs.contains(text));
|
|||||||
| 13 | wordTag | 词对应的标签 | none |
|
| 13 | wordTag | 词对应的标签 | none |
|
||||||
| 14 | charIgnore | 忽略的字符 | none |
|
| 14 | charIgnore | 忽略的字符 | none |
|
||||||
| 15 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 |
|
| 15 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 |
|
||||||
|
| 16 | wordCheckNum | 数字检测策略(v0.25.0开始支持) | `WordChecks.num()` |
|
||||||
|
| 17 | wordCheckEmail | 邮箱检测策略(v0.25.0开始支持) | `WordChecks.email()` |
|
||||||
|
| 18 | wordCheckUrl | URL检测策略(v0.25.0开始支持) | `(WordChecks.url()` |
|
||||||
|
| 19 | wordCheckIpv4 | ipv4检测策略(v0.25.0开始支持) | `WordChecks.ipv4()` |
|
||||||
|
| 20 | wordCheckWord | 敏感词检测策略(v0.25.0开始支持) | `WordChecks.word()` |
|
||||||
|
| 21 | wordReplace | 替换策略 | `WordReplaces.defaults()` |
|
||||||
|
|
||||||
## 内存资源的释放
|
## 内存资源的释放
|
||||||
|
|
||||||
|
|||||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.github.houbb</groupId>
|
<groupId>com.github.houbb</groupId>
|
||||||
<artifactId>sensitive-word</artifactId>
|
<artifactId>sensitive-word</artifactId>
|
||||||
<version>0.24.0</version>
|
<version>0.25.0</version>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<!--============================== All Plugins START ==============================-->
|
<!--============================== All Plugins START ==============================-->
|
||||||
|
|||||||
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
|
|||||||
|
|
||||||
:: 版本号信息(需要手动指定)
|
:: 版本号信息(需要手动指定)
|
||||||
:::: 旧版本名称
|
:::: 旧版本名称
|
||||||
SET version=0.24.0
|
SET version=0.25.0
|
||||||
:::: 新版本名称
|
:::: 新版本名称
|
||||||
SET newVersion=0.25.0
|
SET newVersion=0.26.0
|
||||||
:::: 组织名称
|
:::: 组织名称
|
||||||
SET groupName=com.github.houbb
|
SET groupName=com.github.houbb
|
||||||
:::: 项目名称
|
:::: 项目名称
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ public interface IWordCheck {
|
|||||||
* @param context 执行上下文
|
* @param context 执行上下文
|
||||||
* @return 敏感信息对应的长度
|
* @return 敏感信息对应的长度
|
||||||
* @since 0.0.5
|
* @since 0.0.5
|
||||||
|
* @since 0.24.2 为了黑白名单统一,调整了对应的返回值
|
||||||
*/
|
*/
|
||||||
WordCheckResult sensitiveCheck(final int beginIndex,
|
WordCheckResult sensitiveCheck(final int beginIndex,
|
||||||
final InnerSensitiveWordContext context);
|
final InnerSensitiveWordContext context);
|
||||||
|
|||||||
@@ -275,4 +275,23 @@ public interface IWordContext {
|
|||||||
|
|
||||||
SensitiveWordContext wordResultCondition(IWordResultCondition wordResultCondition);
|
SensitiveWordContext wordResultCondition(IWordResultCondition wordResultCondition);
|
||||||
|
|
||||||
|
IWordCheck wordCheckWord();
|
||||||
|
|
||||||
|
SensitiveWordContext wordCheckWord(IWordCheck wordCheckWord);
|
||||||
|
|
||||||
|
IWordCheck wordCheckNum();
|
||||||
|
|
||||||
|
SensitiveWordContext wordCheckNum(IWordCheck wordCheckNum);
|
||||||
|
|
||||||
|
IWordCheck wordCheckEmail();
|
||||||
|
|
||||||
|
SensitiveWordContext wordCheckEmail(IWordCheck wordCheckEmail);
|
||||||
|
|
||||||
|
IWordCheck wordCheckUrl();
|
||||||
|
|
||||||
|
SensitiveWordContext wordCheckUrl(IWordCheck wordCheckUrl);
|
||||||
|
|
||||||
|
IWordCheck wordCheckIpv4();
|
||||||
|
|
||||||
|
SensitiveWordContext wordCheckIpv4(IWordCheck wordCheckIpv4);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,10 +22,10 @@ public interface IWordData extends ISensitiveWordDestroy {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* 删除敏感词
|
* 删除敏感词
|
||||||
* @param word 单词
|
* @param collection 单词
|
||||||
* @since 0.19.0
|
* @since 0.19.0
|
||||||
*/
|
*/
|
||||||
void removeWord(String word);
|
void removeWord(Collection<String> collection);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 新增敏感词
|
* 新增敏感词
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import com.github.houbb.sensitive.word.api.combine.IWordCheckCombine;
|
|||||||
import com.github.houbb.sensitive.word.api.combine.IWordFormatCombine;
|
import com.github.houbb.sensitive.word.api.combine.IWordFormatCombine;
|
||||||
import com.github.houbb.sensitive.word.core.SensitiveWords;
|
import com.github.houbb.sensitive.word.core.SensitiveWords;
|
||||||
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
||||||
|
import com.github.houbb.sensitive.word.support.check.WordChecks;
|
||||||
import com.github.houbb.sensitive.word.support.combine.allowdeny.WordAllowDenyCombines;
|
import com.github.houbb.sensitive.word.support.combine.allowdeny.WordAllowDenyCombines;
|
||||||
import com.github.houbb.sensitive.word.support.combine.check.WordCheckCombines;
|
import com.github.houbb.sensitive.word.support.combine.check.WordCheckCombines;
|
||||||
import com.github.houbb.sensitive.word.support.combine.format.WordFormatCombines;
|
import com.github.houbb.sensitive.word.support.combine.format.WordFormatCombines;
|
||||||
@@ -182,6 +183,36 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
|||||||
*/
|
*/
|
||||||
private IWordResultCondition wordResultCondition = WordResultConditions.alwaysTrue();
|
private IWordResultCondition wordResultCondition = WordResultConditions.alwaysTrue();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 单词检测策略
|
||||||
|
* @since 0.25.0
|
||||||
|
*/
|
||||||
|
private IWordCheck wordCheckWord = WordChecks.word();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 数字检测策略
|
||||||
|
* @since 0.25.0
|
||||||
|
*/
|
||||||
|
private IWordCheck wordCheckNum = WordChecks.num();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* email 检测策略
|
||||||
|
* @since 0.25.0
|
||||||
|
*/
|
||||||
|
private IWordCheck wordCheckEmail = WordChecks.email();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* URL 检测策略
|
||||||
|
* @since 0.25.0
|
||||||
|
*/
|
||||||
|
private IWordCheck wordCheckUrl = WordChecks.url();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ipv4 检测策略
|
||||||
|
* @since 0.25.0
|
||||||
|
*/
|
||||||
|
private IWordCheck wordCheckIpv4 = WordChecks.ipv4();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 新建验证实例
|
* 新建验证实例
|
||||||
* <p>
|
* <p>
|
||||||
@@ -255,6 +286,13 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
|||||||
context.enableWordCheck(enableWordCheck);
|
context.enableWordCheck(enableWordCheck);
|
||||||
context.enableIpv4Check(enableIpv4Check);
|
context.enableIpv4Check(enableIpv4Check);
|
||||||
|
|
||||||
|
// 校验策略实现配置
|
||||||
|
context.wordCheckWord(wordCheckWord);
|
||||||
|
context.wordCheckEmail(wordCheckEmail);
|
||||||
|
context.wordCheckNum(wordCheckNum);
|
||||||
|
context.wordCheckUrl(wordCheckUrl);
|
||||||
|
context.wordCheckIpv4(wordCheckIpv4);
|
||||||
|
|
||||||
// 额外配置
|
// 额外配置
|
||||||
context.sensitiveCheckNumLen(numCheckLen);
|
context.sensitiveCheckNumLen(numCheckLen);
|
||||||
context.wordReplace(wordReplace);
|
context.wordReplace(wordReplace);
|
||||||
@@ -370,6 +408,41 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public SensitiveWordBs wordCheckWord(IWordCheck wordCheckWord) {
|
||||||
|
ArgUtil.notNull(wordCheckWord, "wordCheckWord");
|
||||||
|
|
||||||
|
this.wordCheckWord = wordCheckWord;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SensitiveWordBs wordCheckNum(IWordCheck wordCheckNum) {
|
||||||
|
ArgUtil.notNull(wordCheckNum, "wordCheckNum");
|
||||||
|
|
||||||
|
this.wordCheckNum = wordCheckNum;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SensitiveWordBs wordCheckEmail(IWordCheck wordCheckEmail) {
|
||||||
|
ArgUtil.notNull(wordCheckEmail, "wordCheckEmail");
|
||||||
|
|
||||||
|
this.wordCheckEmail = wordCheckEmail;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SensitiveWordBs wordCheckUrl(IWordCheck wordCheckUrl) {
|
||||||
|
ArgUtil.notNull(wordCheckUrl, "wordCheckUrl");
|
||||||
|
|
||||||
|
this.wordCheckUrl = wordCheckUrl;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SensitiveWordBs wordCheckIpv4(IWordCheck wordCheckIpv4) {
|
||||||
|
ArgUtil.notNull(wordCheckIpv4, "wordCheckIpv4");
|
||||||
|
|
||||||
|
this.wordCheckIpv4 = wordCheckIpv4;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
//-------------------------------------------------------- 基础属性设置
|
//-------------------------------------------------------- 基础属性设置
|
||||||
/**
|
/**
|
||||||
* 是否启用 ipv4 校验
|
* 是否启用 ipv4 校验
|
||||||
@@ -642,9 +715,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
|||||||
// 主要原因是二者没有保持一致,初始化的数据和插入的数据没有做相同的格式化
|
// 主要原因是二者没有保持一致,初始化的数据和插入的数据没有做相同的格式化
|
||||||
List<String> formatList = InnerWordFormatUtils.formatWordList(collection, context);
|
List<String> formatList = InnerWordFormatUtils.formatWordList(collection, context);
|
||||||
|
|
||||||
for(String word : formatList) {
|
this.wordData.removeWord(formatList);
|
||||||
this.wordData.removeWord(word);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -701,9 +772,8 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
|||||||
// 主要原因是二者没有保持一致,初始化的数据和插入的数据没有做相同的格式化
|
// 主要原因是二者没有保持一致,初始化的数据和插入的数据没有做相同的格式化
|
||||||
List<String> formatList = InnerWordFormatUtils.formatWordList(collection, context);
|
List<String> formatList = InnerWordFormatUtils.formatWordList(collection, context);
|
||||||
|
|
||||||
for(String word : formatList) {
|
this.wordDataAllow.removeWord(formatList);
|
||||||
this.wordDataAllow.removeWord(word);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* 新增敏感词白名单
|
* 新增敏感词白名单
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package com.github.houbb.sensitive.word.bs;
|
package com.github.houbb.sensitive.word.bs;
|
||||||
|
|
||||||
import com.github.houbb.sensitive.word.api.*;
|
import com.github.houbb.sensitive.word.api.*;
|
||||||
|
import com.github.houbb.sensitive.word.support.check.WordChecks;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 上下文
|
* 上下文
|
||||||
@@ -133,6 +134,36 @@ public class SensitiveWordContext implements IWordContext {
|
|||||||
*/
|
*/
|
||||||
private IWordResultCondition wordResultCondition;
|
private IWordResultCondition wordResultCondition;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 单词检测策略
|
||||||
|
* @since 0.25.0
|
||||||
|
*/
|
||||||
|
private IWordCheck wordCheckWord;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 数字检测策略
|
||||||
|
* @since 0.25.0
|
||||||
|
*/
|
||||||
|
private IWordCheck wordCheckNum;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* email 检测策略
|
||||||
|
* @since 0.25.0
|
||||||
|
*/
|
||||||
|
private IWordCheck wordCheckEmail;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* URL 检测策略
|
||||||
|
* @since 0.25.0
|
||||||
|
*/
|
||||||
|
private IWordCheck wordCheckUrl;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ipv4 检测策略
|
||||||
|
* @since 0.25.0
|
||||||
|
*/
|
||||||
|
private IWordCheck wordCheckIpv4;
|
||||||
|
|
||||||
public IWordData wordData() {
|
public IWordData wordData() {
|
||||||
return wordData;
|
return wordData;
|
||||||
}
|
}
|
||||||
@@ -355,4 +386,49 @@ public class SensitiveWordContext implements IWordContext {
|
|||||||
this.wordResultCondition = wordResultCondition;
|
this.wordResultCondition = wordResultCondition;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public IWordCheck wordCheckWord() {
|
||||||
|
return wordCheckWord;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SensitiveWordContext wordCheckWord(IWordCheck wordCheckWord) {
|
||||||
|
this.wordCheckWord = wordCheckWord;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IWordCheck wordCheckNum() {
|
||||||
|
return wordCheckNum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SensitiveWordContext wordCheckNum(IWordCheck wordCheckNum) {
|
||||||
|
this.wordCheckNum = wordCheckNum;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IWordCheck wordCheckEmail() {
|
||||||
|
return wordCheckEmail;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SensitiveWordContext wordCheckEmail(IWordCheck wordCheckEmail) {
|
||||||
|
this.wordCheckEmail = wordCheckEmail;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IWordCheck wordCheckUrl() {
|
||||||
|
return wordCheckUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SensitiveWordContext wordCheckUrl(IWordCheck wordCheckUrl) {
|
||||||
|
this.wordCheckUrl = wordCheckUrl;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IWordCheck wordCheckIpv4() {
|
||||||
|
return wordCheckIpv4;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SensitiveWordContext wordCheckIpv4(IWordCheck wordCheckIpv4) {
|
||||||
|
this.wordCheckIpv4 = wordCheckIpv4;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -60,7 +60,17 @@ public class WordCheckUrl extends AbstractConditionWordCheck {
|
|||||||
|
|
||||||
// 改为 http:// 或者 https:// 开头
|
// 改为 http:// 或者 https:// 开头
|
||||||
String string = stringBuilder.toString();
|
String string = stringBuilder.toString();
|
||||||
return RegexUtil.isUrl(string);
|
return isUrl(string);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 是否为 URL
|
||||||
|
* @param text 原始文本
|
||||||
|
* @return 结果
|
||||||
|
* @since 0.25.0
|
||||||
|
*/
|
||||||
|
protected boolean isUrl(final String text) {
|
||||||
|
return RegexUtil.isUrl(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,32 @@
|
|||||||
|
package com.github.houbb.sensitive.word.support.check;
|
||||||
|
|
||||||
|
import com.github.houbb.heaven.util.util.regex.RegexUtil;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordCheck;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* (1)暂时先粗略的处理 web-site
|
||||||
|
* (2)如果网址的最后为图片类型,则跳过。
|
||||||
|
* (3)长度超过 70,直接结束。
|
||||||
|
*
|
||||||
|
* 不包含前缀的实现策略
|
||||||
|
*
|
||||||
|
* @author binbin.hou
|
||||||
|
* @since 0.25.0
|
||||||
|
*/
|
||||||
|
public class WordCheckUrlNoPrefix extends WordCheckUrl {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @since 0.3.0
|
||||||
|
*/
|
||||||
|
private static final IWordCheck INSTANCE = new WordCheckUrlNoPrefix();
|
||||||
|
|
||||||
|
public static IWordCheck getInstance() {
|
||||||
|
return INSTANCE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean isUrl(String text) {
|
||||||
|
return RegexUtil.isWebSite(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,96 @@
|
|||||||
|
package com.github.houbb.sensitive.word.support.check;
|
||||||
|
|
||||||
|
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||||
|
import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordCheck;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordData;
|
||||||
|
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||||
|
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
|
||||||
|
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
|
||||||
|
import com.github.houbb.sensitive.word.support.result.WordLengthResult;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 敏感词监测实现
|
||||||
|
* @author binbin.hou
|
||||||
|
* @since 0.26.0
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public class WordCheckWordMaxLen extends AbstractWordCheck {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Class<? extends IWordCheck> getSensitiveCheckClass() {
|
||||||
|
return WordCheckWordMaxLen.class;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected WordLengthResult getActualLength(int beginIndex, InnerSensitiveWordContext innerContext) {
|
||||||
|
final String txt = innerContext.originalText();
|
||||||
|
final Map<Character, Character> formatCharMapping = innerContext.formatCharMapping();
|
||||||
|
final IWordContext context = innerContext.wordContext();
|
||||||
|
final IWordData wordData = context.wordData();
|
||||||
|
final IWordData wordDataAllow = context.wordDataAllow();
|
||||||
|
final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore();
|
||||||
|
|
||||||
|
// 前一个条件
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
char[] rawChars = txt.toCharArray();
|
||||||
|
|
||||||
|
int tempLen = 0;
|
||||||
|
int maxWhite = 0;
|
||||||
|
int maxBlack = 0;
|
||||||
|
boolean firstCheck = true;
|
||||||
|
|
||||||
|
WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
|
||||||
|
WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
|
||||||
|
|
||||||
|
for (int i = beginIndex; i < rawChars.length; i++) {
|
||||||
|
if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) {
|
||||||
|
tempLen++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
char mappingChar = formatCharMapping.get(rawChars[i]);
|
||||||
|
stringBuilder.append(mappingChar);
|
||||||
|
tempLen++;
|
||||||
|
|
||||||
|
if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow)) {
|
||||||
|
wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
|
||||||
|
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) {
|
||||||
|
maxWhite += tempLen;
|
||||||
|
wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 黑名单命中
|
||||||
|
if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
|
||||||
|
wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
|
||||||
|
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) {
|
||||||
|
maxBlack += tempLen;
|
||||||
|
wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 不再是第一次检测
|
||||||
|
firstCheck = false;
|
||||||
|
|
||||||
|
// 黑白名单都未匹配
|
||||||
|
if (WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow) &&
|
||||||
|
WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return WordLengthResult.newInstance()
|
||||||
|
.wordAllowLen(maxWhite)
|
||||||
|
.wordDenyLen(maxBlack);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String getType() {
|
||||||
|
return WordTypeEnum.WORD.getCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -77,4 +77,15 @@ public final class WordChecks {
|
|||||||
return WordCheckIPV4.getInstance();
|
return WordCheckIPV4.getInstance();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 不需要前缀的 urlPrefix
|
||||||
|
* 注意:这种检测方法可能会和代码中的包名称冲突
|
||||||
|
*
|
||||||
|
* @return 实现
|
||||||
|
* @since 0.25.0
|
||||||
|
*/
|
||||||
|
public static IWordCheck urlNoPrefix() {
|
||||||
|
return WordCheckUrlNoPrefix.getInstance();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,19 +18,19 @@ public class WordCheckCombine extends AbstractWordCheckCombine {
|
|||||||
List<IWordCheck> wordCheckList = new ArrayList<>();
|
List<IWordCheck> wordCheckList = new ArrayList<>();
|
||||||
|
|
||||||
if(context.enableWordCheck()) {
|
if(context.enableWordCheck()) {
|
||||||
wordCheckList.add(WordChecks.word());
|
wordCheckList.add(context.wordCheckWord());
|
||||||
}
|
}
|
||||||
if(context.enableNumCheck()) {
|
if(context.enableNumCheck()) {
|
||||||
wordCheckList.add(WordChecks.num());
|
wordCheckList.add(context.wordCheckNum());
|
||||||
}
|
}
|
||||||
if(context.enableEmailCheck()) {
|
if(context.enableEmailCheck()) {
|
||||||
wordCheckList.add(WordChecks.email());
|
wordCheckList.add(context.wordCheckEmail());
|
||||||
}
|
}
|
||||||
if(context.enableUrlCheck()) {
|
if(context.enableUrlCheck()) {
|
||||||
wordCheckList.add(WordChecks.url());
|
wordCheckList.add(context.wordCheckUrl());
|
||||||
}
|
}
|
||||||
if(context.enableIpv4Check()) {
|
if(context.enableIpv4Check()) {
|
||||||
wordCheckList.add(WordChecks.ipv4());
|
wordCheckList.add(context.wordCheckIpv4());
|
||||||
}
|
}
|
||||||
|
|
||||||
return wordCheckList;
|
return wordCheckList;
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
package com.github.houbb.sensitive.word.support.data;
|
package com.github.houbb.sensitive.word.support.data;
|
||||||
|
|
||||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
|
||||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||||
import com.github.houbb.sensitive.word.api.IWordData;
|
import com.github.houbb.sensitive.word.api.IWordData;
|
||||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||||
@@ -31,9 +30,9 @@ public abstract class AbstractWordData implements IWordData {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* 删除敏感词
|
* 删除敏感词
|
||||||
* @param word 敏感词
|
* @param collection 集合
|
||||||
*/
|
*/
|
||||||
protected abstract void doRemoveWord(String word);
|
protected abstract void doRemoveWord(Collection<String> collection);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 新增敏感词
|
* 新增敏感词
|
||||||
@@ -49,12 +48,12 @@ public abstract class AbstractWordData implements IWordData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void removeWord(String word) {
|
public void removeWord(Collection<String> collection) {
|
||||||
if(StringUtil.isEmpty(word)) {
|
if(CollectionUtil.isEmpty(collection)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
doRemoveWord(word);
|
doRemoveWord(collection);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ public class WordDataHashMap extends AbstractWordData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void doRemoveWord(String word) {
|
protected void doRemoveWord(Collection<String> collection) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import com.github.houbb.heaven.annotation.ThreadSafe;
|
|||||||
import com.github.houbb.heaven.util.lang.ObjectUtil;
|
import com.github.houbb.heaven.util.lang.ObjectUtil;
|
||||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||||
import com.github.houbb.sensitive.word.api.IWordData;
|
|
||||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||||
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
|
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
|
||||||
|
|
||||||
@@ -84,49 +83,7 @@ public class WordDataTree extends AbstractWordData {
|
|||||||
this.root = newRoot;
|
this.root = newRoot;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void doRemoveWord(String word) {
|
|
||||||
WordDataTreeNode tempNode = root;
|
|
||||||
//需要删除的
|
|
||||||
Map<Character, WordDataTreeNode> map = new HashMap<>();
|
|
||||||
char[] chars = word.toCharArray();
|
|
||||||
int length = chars.length;
|
|
||||||
for (int i = 0; i < length; i++) {
|
|
||||||
//不存在第一个词
|
|
||||||
WordDataTreeNode subNode = tempNode.getSubNode(chars[i]);
|
|
||||||
if (subNode == null) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (i == (length - 1)) {
|
|
||||||
//尾字符判断是否结束
|
|
||||||
if (!subNode.end()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (subNode.getNodeSize() > 0) {
|
|
||||||
//尾字符下还存在字符,即标识即可
|
|
||||||
subNode.end(false);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (subNode.end()) {
|
|
||||||
map.clear();
|
|
||||||
}
|
|
||||||
map.put(chars[i], tempNode);
|
|
||||||
|
|
||||||
tempNode = subNode;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (Map.Entry<Character, WordDataTreeNode> entry : map.entrySet()) {
|
|
||||||
WordDataTreeNode value = entry.getValue();
|
|
||||||
//节点只有一个就置空
|
|
||||||
if (value.getNodeSize() == 1) {
|
|
||||||
value.clearNode();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
//多个就删除
|
|
||||||
value.removeNode(entry.getKey());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 新增敏感词
|
* 新增敏感词
|
||||||
@@ -143,6 +100,21 @@ public class WordDataTree extends AbstractWordData {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected synchronized void doRemoveWord(Collection<String> collection) {
|
||||||
|
for (String word : collection) {
|
||||||
|
if (StringUtil.isEmpty(word)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
removeWord(this.root, word);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 获取当前的 Map
|
* 获取当前的 Map
|
||||||
* @param nowNode 当前节点
|
* @param nowNode 当前节点
|
||||||
@@ -211,4 +183,48 @@ public class WordDataTree extends AbstractWordData {
|
|||||||
tempNode.end(true);
|
tempNode.end(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void removeWord(WordDataTreeNode root, String word){
|
||||||
|
WordDataTreeNode tempNode = root;
|
||||||
|
//需要删除的
|
||||||
|
Map<Character, WordDataTreeNode> map = new HashMap<>();
|
||||||
|
char[] chars = word.toCharArray();
|
||||||
|
int length = chars.length;
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
//不存在第一个词
|
||||||
|
WordDataTreeNode subNode = tempNode.getSubNode(chars[i]);
|
||||||
|
if (subNode == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (i == (length - 1)) {
|
||||||
|
//尾字符判断是否结束
|
||||||
|
if (!subNode.end()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (subNode.getNodeSize() > 0) {
|
||||||
|
//尾字符下还存在字符,即标识即可
|
||||||
|
subNode.end(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (subNode.end()) {
|
||||||
|
map.clear();
|
||||||
|
}
|
||||||
|
map.put(chars[i], tempNode);
|
||||||
|
|
||||||
|
tempNode = subNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Map.Entry<Character, WordDataTreeNode> entry : map.entrySet()) {
|
||||||
|
WordDataTreeNode value = entry.getValue();
|
||||||
|
//节点只有一个就置空
|
||||||
|
if (value.getNodeSize() == 1) {
|
||||||
|
value.clearNode();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
//多个就删除
|
||||||
|
value.removeNode(entry.getKey());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,20 +1,28 @@
|
|||||||
package com.github.houbb.sensitive.word.support.result;
|
package com.github.houbb.sensitive.word.support.result;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 说明:统一让黑白名单一次遍历,性能优化
|
||||||
|
*
|
||||||
|
* @since 0.24.2
|
||||||
|
*/
|
||||||
public class WordLengthResult {
|
public class WordLengthResult {
|
||||||
|
/**
|
||||||
|
* 白名单长度
|
||||||
|
*/
|
||||||
private int wordAllowLen;
|
private int wordAllowLen;
|
||||||
|
/**
|
||||||
|
* 黑名单长度
|
||||||
|
*/
|
||||||
private int wordDenyLen;
|
private int wordDenyLen;
|
||||||
|
|
||||||
|
|
||||||
private WordLengthResult(){}
|
|
||||||
|
|
||||||
public static WordLengthResult newInstance() {
|
public static WordLengthResult newInstance() {
|
||||||
return new WordLengthResult();
|
return new WordLengthResult();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public int wordAllowLen() {
|
public int wordAllowLen() {
|
||||||
return this.wordAllowLen;
|
return this.wordAllowLen;
|
||||||
}
|
}
|
||||||
|
|
||||||
public WordLengthResult wordAllowLen(int wordAllowLen) {
|
public WordLengthResult wordAllowLen(int wordAllowLen) {
|
||||||
this.wordAllowLen = wordAllowLen;
|
this.wordAllowLen = wordAllowLen;
|
||||||
return this;
|
return this;
|
||||||
@@ -23,10 +31,18 @@ public class WordLengthResult {
|
|||||||
public int wordDenyLen() {
|
public int wordDenyLen() {
|
||||||
return this.wordDenyLen;
|
return this.wordDenyLen;
|
||||||
}
|
}
|
||||||
|
|
||||||
public WordLengthResult wordDenyLen(int wordDenyLen) {
|
public WordLengthResult wordDenyLen(int wordDenyLen) {
|
||||||
this.wordDenyLen = wordDenyLen;
|
this.wordDenyLen = wordDenyLen;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "WordLengthResult{" +
|
||||||
|
"wordAllowLen=" + wordAllowLen +
|
||||||
|
", wordDenyLen=" + wordDenyLen +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -71,6 +71,8 @@ public class WordTags {
|
|||||||
/**
|
/**
|
||||||
* 根据标准的约定行处理
|
* 根据标准的约定行处理
|
||||||
* @param lines 行信息
|
* @param lines 行信息
|
||||||
|
* @param wordSplit 单词分割
|
||||||
|
* @param tagSplit 标签分割
|
||||||
* @return 结果
|
* @return 结果
|
||||||
*/
|
*/
|
||||||
public static IWordTag lines(final Collection<String> lines, final String wordSplit, final String tagSplit) {
|
public static IWordTag lines(final Collection<String> lines, final String wordSplit, final String tagSplit) {
|
||||||
|
|||||||
@@ -72,9 +72,9 @@ public class BenchmarkBasicTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* * 黑白名单一次遍历 优化前:300*他们在地铁口交易,查10000次,26183
|
* 黑白名单一次遍历 优化前:300*他们在地铁口交易,查10000次,26183
|
||||||
* * 黑白名单一次遍历 优化后:300*他们在地铁口交易,查10000次,15705
|
* 黑白名单一次遍历 优化后:300*他们在地铁口交易,查10000次,15705
|
||||||
*
|
* @since 0.24.2
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void costTimeOneTraceTest() {
|
public void costTimeOneTraceTest() {
|
||||||
@@ -86,24 +86,17 @@ public class BenchmarkBasicTest {
|
|||||||
|
|
||||||
// 1W 次
|
// 1W 次
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
|
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().wordDeny(new IWordDeny() {
|
||||||
.wordDeny(new IWordDeny() {
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> deny() {
|
public List<String> deny() {
|
||||||
return Collections.singletonList("口交");
|
return Collections.singletonList("口交");
|
||||||
}
|
}
|
||||||
})
|
}).wordAllow(new IWordAllow() {
|
||||||
.wordAllow(new IWordAllow() {
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> allow() {
|
public List<String> allow() {
|
||||||
return Collections.singletonList("地铁口交易");
|
return Collections.singletonList("地铁口交易");
|
||||||
}
|
}
|
||||||
})
|
}).enableWordCheck(true).enableNumCheck(false).enableUrlCheck(false).enableEmailCheck(false).init();
|
||||||
.enableWordCheck(true)
|
|
||||||
.enableNumCheck(false)
|
|
||||||
.enableUrlCheck(false)
|
|
||||||
.enableEmailCheck(false)
|
|
||||||
.init();
|
|
||||||
|
|
||||||
for (int i = 0; i < 10000; i++) {
|
for (int i = 0; i < 10000; i++) {
|
||||||
sensitiveWordBs.findAll(text);
|
sensitiveWordBs.findAll(text);
|
||||||
|
|||||||
@@ -4,8 +4,10 @@ import com.github.houbb.heaven.util.io.FileUtil;
|
|||||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||||
import com.github.houbb.sensitive.word.data.WordCountDto;
|
import com.github.houbb.sensitive.word.data.WordCountDto;
|
||||||
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
||||||
|
import com.github.houbb.sensitive.word.support.check.WordChecks;
|
||||||
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||||
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
|
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
|
||||||
|
import com.github.houbb.sensitive.word.support.replace.WordReplaces;
|
||||||
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
|
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
|
||||||
import com.github.houbb.sensitive.word.support.tag.WordTags;
|
import com.github.houbb.sensitive.word.support.tag.WordTags;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
@@ -39,46 +41,19 @@ public class SensitiveWordBsConfigTest {
|
|||||||
.enableUrlCheck(false)
|
.enableUrlCheck(false)
|
||||||
.enableIpv4Check(false)
|
.enableIpv4Check(false)
|
||||||
.enableWordCheck(true)
|
.enableWordCheck(true)
|
||||||
|
.wordCheckNum(WordChecks.num())
|
||||||
|
.wordCheckEmail(WordChecks.email())
|
||||||
|
.wordCheckUrl(WordChecks.url())
|
||||||
|
.wordCheckIpv4(WordChecks.ipv4())
|
||||||
|
.wordCheckWord(WordChecks.word())
|
||||||
.numCheckLen(8)
|
.numCheckLen(8)
|
||||||
.wordTag(WordTags.none())
|
.wordTag(WordTags.none())
|
||||||
.charIgnore(SensitiveWordCharIgnores.defaults())
|
.charIgnore(SensitiveWordCharIgnores.defaults())
|
||||||
.wordResultCondition(WordResultConditions.alwaysTrue())
|
.wordResultCondition(WordResultConditions.alwaysTrue())
|
||||||
.wordAllow(WordAllows.defaults())
|
.wordAllow(WordAllows.defaults())
|
||||||
.wordDeny(WordDenys.defaults())
|
.wordDeny(WordDenys.defaults())
|
||||||
|
.wordReplace(WordReplaces.defaults())
|
||||||
.init();
|
.init();
|
||||||
|
|
||||||
// String dir = "D:\\code\\github\\houbb.github.io\\_posts";
|
|
||||||
// File[] files = new File(dir).listFiles();
|
|
||||||
//
|
|
||||||
// Set<String> wordSet = new HashSet<>();
|
|
||||||
//
|
|
||||||
// Map<String, Integer> wordCountMap = new HashMap<>();
|
|
||||||
// for(File file : files) {
|
|
||||||
// String content = FileUtil.getFileContent(file);
|
|
||||||
// List<String> allWords = wordBs.findAll(content);
|
|
||||||
//
|
|
||||||
// for(String word : allWords) {
|
|
||||||
// Integer integer = wordCountMap.get(word);
|
|
||||||
// if(integer == null) {
|
|
||||||
// integer = 0;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// integer++;
|
|
||||||
// wordCountMap.put(word, integer);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// System.out.println(file.getName());
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
//// List<WordCountDto> wordCountDtoList = new ArrayList<>();
|
|
||||||
// for(Map.Entry<String, Integer> entry : wordCountMap.entrySet()) {
|
|
||||||
// if(entry.getValue() >= 3) {
|
|
||||||
// System.out.println(entry.getKey() + " : " + entry.getValue());
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Collections.sort(wordCountDtoList);
|
|
||||||
// System.out.println(wordCountDtoList);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|||||||
@@ -0,0 +1,37 @@
|
|||||||
|
package com.github.houbb.sensitive.word.bs;
|
||||||
|
|
||||||
|
import com.github.houbb.sensitive.word.support.check.WordChecks;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p> project: sensitive-word-SensitiveWordBsTest </p>
|
||||||
|
* <p> create on 2020/1/7 23:43 </p>
|
||||||
|
*
|
||||||
|
* @author Administrator
|
||||||
|
* @since 0.25.0
|
||||||
|
*/
|
||||||
|
public class SensitiveWordBsUrlNoPrefixTest {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* URL 检测
|
||||||
|
*
|
||||||
|
* @since 0.25.0
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void urlNoPrefixTest() {
|
||||||
|
final String text = "点击链接 https://www.baidu.com 查看答案,当然也可以是 baidu.com、www.baidu.com";
|
||||||
|
|
||||||
|
final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
|
||||||
|
.enableUrlCheck(true) // 启用URL检测
|
||||||
|
.wordCheckUrl(WordChecks.urlNoPrefix()) //指定检测的方式
|
||||||
|
.init();
|
||||||
|
List<String> wordList = sensitiveWordBs.findAll(text);
|
||||||
|
Assert.assertEquals("[www.baidu.com, baidu.com, www.baidu.com]", wordList.toString());
|
||||||
|
|
||||||
|
Assert.assertEquals("点击链接 https://************* 查看答案,当然也可以是 *********、*************", sensitiveWordBs.replace(text));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
package com.github.houbb.sensitive.word.bs;
|
||||||
|
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class SensitiveWordMaxFirstTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void maxFirstTest() {
|
||||||
|
SensitiveWordBs bs = SensitiveWordBs.newInstance()
|
||||||
|
.wordDeny(new IWordDeny() {
|
||||||
|
@Override
|
||||||
|
public List<String> deny() {
|
||||||
|
return Arrays.asList("我的世界", "我的");
|
||||||
|
}
|
||||||
|
}).init();
|
||||||
|
|
||||||
|
String text = "我的世界我的好玩";
|
||||||
|
|
||||||
|
List<String> textList = bs.findAll(text);
|
||||||
|
// Assert.assertEquals("", textList.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user