release branch 0.25.0

This commit is contained in:
houbb
2025-02-17 12:45:07 +08:00
parent b7757e6f3f
commit 49407d1489
13 changed files with 323 additions and 59 deletions

View File

@@ -405,4 +405,11 @@
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|---------------------|:------------------|:-------------------|
| 1 | O | findFirst 真实实现,性能优化 | 2025-2-2 15:30:26 | PR-99 |
| 2 | O | 黑白名单遍历统一优化,性能优化 | 2025-2-2 15:30:26 | PR-99 涉及到原始结果返回值调整 |
| 2 | O | 黑白名单遍历统一优化,性能优化 | 2025-2-2 15:30:26 | PR-99 涉及到原始结果返回值调整 |
# release_0.25.0
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|----------------------|:-------------------|:-----|
| 1 | A | wordCheck 策略支持用户自定义 | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
| 2 | A | wordCheckUrlNoPrefix | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |

View File

@@ -58,21 +58,6 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md)
### V0.24.0
- 初步内置实现单词标签,丰富单词标签内置策略
### V0.24.1
- 删除时统一添加同步锁 sync
### V0.24.2
- 统一黑白名单为一次遍历,性能优化
- 实现真实的 findFirst性能优化
## 更多资料
### 敏感词控台
@@ -111,7 +96,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.24.2</version>
<version>0.25.0</version>
</dependency>
```
@@ -377,6 +362,22 @@ Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString());
## 更多检测策略
### 说明
v0.25.0 目前的几个策略,也支持用户引导类自定义。所有的策略都是接口,支持用户自定义实现。
| 序号 | 方法 | 说明 | 默认值 |
|:---|:---------------------|:-------------------------------------------|:------|
| 16 | wordCheckNum | 数字检测策略(v0.25.0开始支持) | `WordChecks.num()` |
| 17 | wordCheckEmail | 邮箱检测策略(v0.25.0开始支持) | `WordChecks.email()` |
| 18 | wordCheckUrl | URL检测策略(v0.25.0开始支持),内置还是实现了 `urlNoPrefix()` | `(WordChecks.url()` |
| 19 | wordCheckIpv4 | ipv4检测策略(v0.25.0开始支持) | `WordChecks.ipv4()` |
| 20 | wordCheckWord | 敏感词检测策略(v0.25.0开始支持) | `WordChecks.word()` |
内置实现:
a) `WordChecks.urlNoPrefix()` 作为 url 的额外实现,可以不需要 `https://``http://` 前缀。
### 邮箱检测
邮箱等个人信息,默认未启用。
@@ -425,6 +426,21 @@ Assert.assertEquals("[https://www.baidu.com]", wordList.toString());
Assert.assertEquals("点击链接 ********************* 查看答案", sensitiveWordBs.replace(text));
```
v0.25.0 内置支持不需要 http 协议的前缀检测:
```java
final String text = "点击链接 https://www.baidu.com 查看答案,当然也可以是 baidu.com、www.baidu.com";
final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
.enableUrlCheck(true) // 启用URL检测
.wordCheckUrl(WordChecks.urlNoPrefix()) //指定检测的方式
.init();
List<String> wordList = sensitiveWordBs.findAll(text);
Assert.assertEquals("[www.baidu.com, baidu.com, www.baidu.com]", wordList.toString());
Assert.assertEquals("点击链接 https://************* 查看答案,当然也可以是 *********、*************", sensitiveWordBs.replace(text));
```
### IPV4 检测
v0.17.0 支持
@@ -467,6 +483,11 @@ SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
.enableUrlCheck(false)
.enableIpv4Check(false)
.enableWordCheck(true)
.wordCheckNum(WordChecks.num())
.wordCheckEmail(WordChecks.email())
.wordCheckUrl(WordChecks.url())
.wordCheckIpv4(WordChecks.ipv4())
.wordCheckWord(WordChecks.word())
.numCheckLen(8)
.wordTag(WordTags.none())
.charIgnore(SensitiveWordCharIgnores.defaults())
@@ -497,6 +518,11 @@ Assert.assertTrue(wordBs.contains(text));
| 13 | wordTag | 词对应的标签 | none |
| 14 | charIgnore | 忽略的字符 | none |
| 15 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 |
| 16 | wordCheckNum | 数字检测策略(v0.25.0开始支持) | `WordChecks.num()` |
| 17 | wordCheckEmail | 邮箱检测策略(v0.25.0开始支持) | `WordChecks.email()` |
| 18 | wordCheckUrl | URL检测策略(v0.25.0开始支持) | `(WordChecks.url()` |
| 19 | wordCheckIpv4 | ipv4检测策略(v0.25.0开始支持) | `WordChecks.ipv4()` |
| 20 | wordCheckWord | 敏感词检测策略(v0.25.0开始支持) | `WordChecks.word()` |
## 内存资源的释放

View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.24.2</version>
<version>0.25.0</version>
<properties>
<!--============================== All Plugins START ==============================-->

View File

@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
SET version=0.24.2
SET version=0.25.0
:::: 新版本名称
SET newVersion=0.25.0
SET newVersion=0.26.0
:::: 组织名称
SET groupName=com.github.houbb
:::: 项目名称

View File

@@ -275,4 +275,23 @@ public interface IWordContext {
SensitiveWordContext wordResultCondition(IWordResultCondition wordResultCondition);
IWordCheck wordCheckWord();
SensitiveWordContext wordCheckWord(IWordCheck wordCheckWord);
IWordCheck wordCheckNum();
SensitiveWordContext wordCheckNum(IWordCheck wordCheckNum);
IWordCheck wordCheckEmail();
SensitiveWordContext wordCheckEmail(IWordCheck wordCheckEmail);
IWordCheck wordCheckUrl();
SensitiveWordContext wordCheckUrl(IWordCheck wordCheckUrl);
IWordCheck wordCheckIpv4();
SensitiveWordContext wordCheckIpv4(IWordCheck wordCheckIpv4);
}

View File

@@ -10,6 +10,7 @@ import com.github.houbb.sensitive.word.api.combine.IWordCheckCombine;
import com.github.houbb.sensitive.word.api.combine.IWordFormatCombine;
import com.github.houbb.sensitive.word.core.SensitiveWords;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.check.WordChecks;
import com.github.houbb.sensitive.word.support.combine.allowdeny.WordAllowDenyCombines;
import com.github.houbb.sensitive.word.support.combine.check.WordCheckCombines;
import com.github.houbb.sensitive.word.support.combine.format.WordFormatCombines;
@@ -182,6 +183,36 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
*/
private IWordResultCondition wordResultCondition = WordResultConditions.alwaysTrue();
/**
* 单词检测策略
* @since 0.25.0
*/
private IWordCheck wordCheckWord = WordChecks.word();
/**
* 数字检测策略
* @since 0.25.0
*/
private IWordCheck wordCheckNum = WordChecks.num();
/**
* email 检测策略
* @since 0.25.0
*/
private IWordCheck wordCheckEmail = WordChecks.email();
/**
* URL 检测策略
* @since 0.25.0
*/
private IWordCheck wordCheckUrl = WordChecks.url();
/**
* ipv4 检测策略
* @since 0.25.0
*/
private IWordCheck wordCheckIpv4 = WordChecks.ipv4();
/**
* 新建验证实例
* <p>
@@ -255,6 +286,13 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
context.enableWordCheck(enableWordCheck);
context.enableIpv4Check(enableIpv4Check);
// 校验策略实现配置
context.wordCheckWord(wordCheckWord);
context.wordCheckEmail(wordCheckEmail);
context.wordCheckNum(wordCheckNum);
context.wordCheckUrl(wordCheckUrl);
context.wordCheckIpv4(wordCheckIpv4);
// 额外配置
context.sensitiveCheckNumLen(numCheckLen);
context.wordReplace(wordReplace);
@@ -370,6 +408,41 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
return this;
}
public SensitiveWordBs wordCheckWord(IWordCheck wordCheckWord) {
ArgUtil.notNull(wordCheckWord, "wordCheckWord");
this.wordCheckWord = wordCheckWord;
return this;
}
public SensitiveWordBs wordCheckNum(IWordCheck wordCheckNum) {
ArgUtil.notNull(wordCheckNum, "wordCheckNum");
this.wordCheckNum = wordCheckNum;
return this;
}
public SensitiveWordBs wordCheckEmail(IWordCheck wordCheckEmail) {
ArgUtil.notNull(wordCheckEmail, "wordCheckEmail");
this.wordCheckEmail = wordCheckEmail;
return this;
}
public SensitiveWordBs wordCheckUrl(IWordCheck wordCheckUrl) {
ArgUtil.notNull(wordCheckUrl, "wordCheckUrl");
this.wordCheckUrl = wordCheckUrl;
return this;
}
public SensitiveWordBs wordCheckIpv4(IWordCheck wordCheckIpv4) {
ArgUtil.notNull(wordCheckIpv4, "wordCheckIpv4");
this.wordCheckIpv4 = wordCheckIpv4;
return this;
}
//-------------------------------------------------------- 基础属性设置
/**
* 是否启用 ipv4 校验

View File

@@ -1,6 +1,7 @@
package com.github.houbb.sensitive.word.bs;
import com.github.houbb.sensitive.word.api.*;
import com.github.houbb.sensitive.word.support.check.WordChecks;
/**
* 上下文
@@ -133,6 +134,36 @@ public class SensitiveWordContext implements IWordContext {
*/
private IWordResultCondition wordResultCondition;
/**
* 单词检测策略
* @since 0.25.0
*/
private IWordCheck wordCheckWord;
/**
* 数字检测策略
* @since 0.25.0
*/
private IWordCheck wordCheckNum;
/**
* email 检测策略
* @since 0.25.0
*/
private IWordCheck wordCheckEmail;
/**
* URL 检测策略
* @since 0.25.0
*/
private IWordCheck wordCheckUrl;
/**
* ipv4 检测策略
* @since 0.25.0
*/
private IWordCheck wordCheckIpv4;
public IWordData wordData() {
return wordData;
}
@@ -355,4 +386,49 @@ public class SensitiveWordContext implements IWordContext {
this.wordResultCondition = wordResultCondition;
return this;
}
public IWordCheck wordCheckWord() {
return wordCheckWord;
}
public SensitiveWordContext wordCheckWord(IWordCheck wordCheckWord) {
this.wordCheckWord = wordCheckWord;
return this;
}
public IWordCheck wordCheckNum() {
return wordCheckNum;
}
public SensitiveWordContext wordCheckNum(IWordCheck wordCheckNum) {
this.wordCheckNum = wordCheckNum;
return this;
}
public IWordCheck wordCheckEmail() {
return wordCheckEmail;
}
public SensitiveWordContext wordCheckEmail(IWordCheck wordCheckEmail) {
this.wordCheckEmail = wordCheckEmail;
return this;
}
public IWordCheck wordCheckUrl() {
return wordCheckUrl;
}
public SensitiveWordContext wordCheckUrl(IWordCheck wordCheckUrl) {
this.wordCheckUrl = wordCheckUrl;
return this;
}
public IWordCheck wordCheckIpv4() {
return wordCheckIpv4;
}
public SensitiveWordContext wordCheckIpv4(IWordCheck wordCheckIpv4) {
this.wordCheckIpv4 = wordCheckIpv4;
return this;
}
}

View File

@@ -60,7 +60,17 @@ public class WordCheckUrl extends AbstractConditionWordCheck {
// 改为 http:// 或者 https:// 开头
String string = stringBuilder.toString();
return RegexUtil.isUrl(string);
return isUrl(string);
}
/**
* 是否为 URL
* @param text 原始文本
* @return 结果
* @since 0.25.0
*/
protected boolean isUrl(final String text) {
return RegexUtil.isUrl(text);
}
}

View File

@@ -0,0 +1,32 @@
package com.github.houbb.sensitive.word.support.check;
import com.github.houbb.heaven.util.util.regex.RegexUtil;
import com.github.houbb.sensitive.word.api.IWordCheck;
/**
* 1暂时先粗略的处理 web-site
* 2如果网址的最后为图片类型则跳过。
* 3长度超过 70直接结束。
*
* 不包含前缀的实现策略
*
* @author binbin.hou
* @since 0.25.0
*/
public class WordCheckUrlNoPrefix extends WordCheckUrl {
/**
* @since 0.3.0
*/
private static final IWordCheck INSTANCE = new WordCheckUrlNoPrefix();
public static IWordCheck getInstance() {
return INSTANCE;
}
@Override
protected boolean isUrl(String text) {
return RegexUtil.isWebSite(text);
}
}

View File

@@ -77,4 +77,15 @@ public final class WordChecks {
return WordCheckIPV4.getInstance();
}
/**
* 不需要前缀的 urlPrefix
* 注意:这种检测方法可能会和代码中的包名称冲突
*
* @return 实现
* @since 0.25.0
*/
public static IWordCheck urlNoPrefix() {
return WordCheckUrlNoPrefix.getInstance();
}
}

View File

@@ -18,19 +18,19 @@ public class WordCheckCombine extends AbstractWordCheckCombine {
List<IWordCheck> wordCheckList = new ArrayList<>();
if(context.enableWordCheck()) {
wordCheckList.add(WordChecks.word());
wordCheckList.add(context.wordCheckWord());
}
if(context.enableNumCheck()) {
wordCheckList.add(WordChecks.num());
wordCheckList.add(context.wordCheckNum());
}
if(context.enableEmailCheck()) {
wordCheckList.add(WordChecks.email());
wordCheckList.add(context.wordCheckEmail());
}
if(context.enableUrlCheck()) {
wordCheckList.add(WordChecks.url());
wordCheckList.add(context.wordCheckUrl());
}
if(context.enableIpv4Check()) {
wordCheckList.add(WordChecks.ipv4());
wordCheckList.add(context.wordCheckIpv4());
}
return wordCheckList;

View File

@@ -4,6 +4,7 @@ import com.github.houbb.heaven.util.io.FileUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.data.WordCountDto;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.check.WordChecks;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
@@ -39,6 +40,11 @@ public class SensitiveWordBsConfigTest {
.enableUrlCheck(false)
.enableIpv4Check(false)
.enableWordCheck(true)
.wordCheckNum(WordChecks.num())
.wordCheckEmail(WordChecks.email())
.wordCheckUrl(WordChecks.url())
.wordCheckIpv4(WordChecks.ipv4())
.wordCheckWord(WordChecks.word())
.numCheckLen(8)
.wordTag(WordTags.none())
.charIgnore(SensitiveWordCharIgnores.defaults())
@@ -46,39 +52,6 @@ public class SensitiveWordBsConfigTest {
.wordAllow(WordAllows.defaults())
.wordDeny(WordDenys.defaults())
.init();
// String dir = "D:\\code\\github\\houbb.github.io\\_posts";
// File[] files = new File(dir).listFiles();
//
// Set<String> wordSet = new HashSet<>();
//
// Map<String, Integer> wordCountMap = new HashMap<>();
// for(File file : files) {
// String content = FileUtil.getFileContent(file);
// List<String> allWords = wordBs.findAll(content);
//
// for(String word : allWords) {
// Integer integer = wordCountMap.get(word);
// if(integer == null) {
// integer = 0;
// }
//
// integer++;
// wordCountMap.put(word, integer);
// }
//
// System.out.println(file.getName());
// }
//
//// List<WordCountDto> wordCountDtoList = new ArrayList<>();
// for(Map.Entry<String, Integer> entry : wordCountMap.entrySet()) {
// if(entry.getValue() >= 3) {
// System.out.println(entry.getKey() + " : " + entry.getValue());
// }
// }
// Collections.sort(wordCountDtoList);
// System.out.println(wordCountDtoList);
}
@Test

View File

@@ -0,0 +1,37 @@
package com.github.houbb.sensitive.word.bs;
import com.github.houbb.sensitive.word.support.check.WordChecks;
import org.junit.Assert;
import org.junit.Test;
import java.util.List;
/**
* <p> project: sensitive-word-SensitiveWordBsTest </p>
* <p> create on 2020/1/7 23:43 </p>
*
* @author Administrator
* @since 0.25.0
*/
public class SensitiveWordBsUrlNoPrefixTest {
/**
* URL 检测
*
* @since 0.25.0
*/
@Test
public void urlNoPrefixTest() {
final String text = "点击链接 https://www.baidu.com 查看答案,当然也可以是 baidu.com、www.baidu.com";
final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
.enableUrlCheck(true) // 启用URL检测
.wordCheckUrl(WordChecks.urlNoPrefix()) //指定检测的方式
.init();
List<String> wordList = sensitiveWordBs.findAll(text);
Assert.assertEquals("[www.baidu.com, baidu.com, www.baidu.com]", wordList.toString());
Assert.assertEquals("点击链接 https://************* 查看答案,当然也可以是 *********、*************", sensitiveWordBs.replace(text));
}
}