release branch 0.26.0

This commit is contained in:
houbb
2025-05-03 01:24:34 +08:00
parent 992063e46a
commit e6e4e05b1c
9 changed files with 154 additions and 51 deletions

View File

@@ -419,3 +419,9 @@
| 序号 | 变更类型 | 说明 | 时间 | 备注 | | 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|------------------------------|:------------------|:---------------------------------------------------| |:---|:-----|------------------------------|:------------------|:---------------------------------------------------|
| 1 | A | 修正 tags 匹配问题,黑名单命中时返回对应的黑名单词 | 2025-5-2 20:25:04 | https://github.com/houbb/sensitive-word/issues/105 | | 1 | A | 修正 tags 匹配问题,黑名单命中时返回对应的黑名单词 | 2025-5-2 20:25:04 | https://github.com/houbb/sensitive-word/issues/105 |
# release_0.26.0
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|----------|:------------------|:---------------------------------------------------|
| 1 | A | 支持最长匹配模式 | 2025-5-3 00:58:42 | https://github.com/houbb/sensitive-word/issues/110 |

View File

@@ -54,6 +54,8 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大
- [支持黑白名单单个的新增/修改,无需全量初始化](https://github.com/houbb/sensitive-word?tab=readme-ov-file#%E9%92%88%E5%AF%B9%E5%8D%95%E4%B8%AA%E8%AF%8D%E7%9A%84%E6%96%B0%E5%A2%9E%E5%88%A0%E9%99%A4%E6%97%A0%E9%9C%80%E5%85%A8%E9%87%8F%E5%88%9D%E5%A7%8B%E5%8C%96) - [支持黑白名单单个的新增/修改,无需全量初始化](https://github.com/houbb/sensitive-word?tab=readme-ov-file#%E9%92%88%E5%AF%B9%E5%8D%95%E4%B8%AA%E8%AF%8D%E7%9A%84%E6%96%B0%E5%A2%9E%E5%88%A0%E9%99%A4%E6%97%A0%E9%9C%80%E5%85%A8%E9%87%8F%E5%88%9D%E5%A7%8B%E5%8C%96)
- [支持词匹配模式的两种模式]()
## 变更日志 ## 变更日志
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md) [CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md)
@@ -483,6 +485,7 @@ SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
.enableUrlCheck(false) .enableUrlCheck(false)
.enableIpv4Check(false) .enableIpv4Check(false)
.enableWordCheck(true) .enableWordCheck(true)
.wordFailFast(true)
.wordCheckNum(WordChecks.num()) .wordCheckNum(WordChecks.num())
.wordCheckEmail(WordChecks.email()) .wordCheckEmail(WordChecks.email())
.wordCheckUrl(WordChecks.url()) .wordCheckUrl(WordChecks.url())
@@ -502,7 +505,7 @@ Assert.assertTrue(wordBs.contains(text));
其中各项配置的说明如下: 其中各项配置的说明如下:
| 序号 | 方法 | 说明 | 默认值 | | 序号 | 方法 | 说明 | 默认值 |
|:---|:--------------------|:-----------------------------|:------| |:---|:--------------------|:-----------------------------|:--------------------------|
| 1 | ignoreCase | 忽略大小写 | true | | 1 | ignoreCase | 忽略大小写 | true |
| 2 | ignoreWidth | 忽略半角圆角 | true | | 2 | ignoreWidth | 忽略半角圆角 | true |
| 3 | ignoreNumStyle | 忽略数字的写法 | true | | 3 | ignoreNumStyle | 忽略数字的写法 | true |
@@ -524,6 +527,54 @@ Assert.assertTrue(wordBs.contains(text));
| 19 | wordCheckIpv4 | ipv4检测策略(v0.25.0开始支持) | `WordChecks.ipv4()` | | 19 | wordCheckIpv4 | ipv4检测策略(v0.25.0开始支持) | `WordChecks.ipv4()` |
| 20 | wordCheckWord | 敏感词检测策略(v0.25.0开始支持) | `WordChecks.word()` | | 20 | wordCheckWord | 敏感词检测策略(v0.25.0开始支持) | `WordChecks.word()` |
| 21 | wordReplace | 替换策略 | `WordReplaces.defaults()` | | 21 | wordReplace | 替换策略 | `WordReplaces.defaults()` |
| 22 | wordFailFast | 敏感词匹配模式是否快速返回 | true |
## wordFailFast 敏感词匹配快速失败模式
### 场景说明
v0.26.0 开始支持。
默认情况下wordFailFast=true。匹配时快速返回性能较好。
但是有时候不太符合人的直觉。
默认如下:
```java
SensitiveWordBs bs2 = SensitiveWordBs.newInstance()
.wordDeny(new IWordDeny() {
@Override
public List<String> deny() {
return Arrays.asList("我的世界", "我的");
}
}).init();
List<String> textList2 = bs2.findAll(text);
Assert.assertEquals(Arrays.asList("我的", "我的"), textList2);
```
此时会优先匹配短的【我的】,导致后面的【我的世界】被跳过。
### failOver 模式
尽可能找到最长的匹配词。
```java
SensitiveWordBs bs = SensitiveWordBs.newInstance()
.wordFailFast(false)
.wordDeny(new IWordDeny() {
@Override
public List<String> deny() {
return Arrays.asList("我的世界", "我的");
}
}).init();
String text = "他的世界它的世界和她的世界都不是我的也不是我的世界";
List<String> textList = bs.findAll(text);
Assert.assertEquals(Arrays.asList("我的", "我的世界"), textList);
```
## 内存资源的释放 ## 内存资源的释放

View File

@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定) :: 版本号信息(需要手动指定)
:::: 旧版本名称 :::: 旧版本名称
SET version=0.25.1 SET version=0.26.0
:::: 新版本名称 :::: 新版本名称
SET newVersion=0.26.0 SET newVersion=0.27.0
:::: 组织名称 :::: 组织名称
SET groupName=com.github.houbb SET groupName=com.github.houbb
:::: 项目名称 :::: 项目名称

View File

@@ -12,11 +12,18 @@ public interface IWordContext {
/** /**
* 为true时遇到第一个敏感词词就返回 * 为true时遇到第一个敏感词词就返回
* 解决issue110 * 解决issue110
* @return * @return 是否
* @since 0.26.0
*/ */
boolean failFastWordPattern(); boolean wordFailFast();
IWordContext failFastWordPattern(boolean failFastWordPattern); /**
* word 快速失败
* @param wordFailFast 快速失败
* @return this
* @since 0.26.0
*/
IWordContext wordFailFast(boolean wordFailFast);
/** /**

View File

@@ -69,7 +69,11 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
*/ */
private boolean ignoreRepeat = false; private boolean ignoreRepeat = false;
private boolean failFastWordPattern = true; /**
* 单词快速匹配模式
* @since 0.26.0
*/
private boolean wordFailFast = true;
// 开启校验 // 开启校验
@@ -280,7 +284,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
context.ignoreChineseStyle(ignoreChineseStyle); context.ignoreChineseStyle(ignoreChineseStyle);
context.ignoreEnglishStyle(ignoreEnglishStyle); context.ignoreEnglishStyle(ignoreEnglishStyle);
context.ignoreRepeat(ignoreRepeat); context.ignoreRepeat(ignoreRepeat);
context.failFastWordPattern(failFastWordPattern); context.wordFailFast(wordFailFast);
// 开启校验 // 开启校验
context.enableNumCheck(enableNumCheck); context.enableNumCheck(enableNumCheck);
@@ -582,8 +586,15 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
this.ignoreRepeat = ignoreRepeat; this.ignoreRepeat = ignoreRepeat;
return this; return this;
} }
public SensitiveWordBs failFastWordPattern(boolean failFastWordPattern) {
this.failFastWordPattern = failFastWordPattern; /**
* 设置快速返回
* @param wordFailFast 快速匹配
* @return this
* @since 0.26.0
*/
public SensitiveWordBs wordFailFast(boolean wordFailFast) {
this.wordFailFast = wordFailFast;
return this; return this;
} }

View File

@@ -1,7 +1,6 @@
package com.github.houbb.sensitive.word.bs; package com.github.houbb.sensitive.word.bs;
import com.github.houbb.sensitive.word.api.*; import com.github.houbb.sensitive.word.api.*;
import com.github.houbb.sensitive.word.support.check.WordChecks;
/** /**
* 上下文 * 上下文
@@ -13,8 +12,9 @@ public class SensitiveWordContext implements IWordContext {
/** /**
* issue110 * issue110
* @since 0.26.0
*/ */
private boolean failFastWordPattern; private boolean wordFailFast;
/** /**
* 忽略大小写 * 忽略大小写
@@ -227,14 +227,14 @@ public class SensitiveWordContext implements IWordContext {
} }
@Override @Override
public boolean failFastWordPattern() { public boolean wordFailFast() {
return failFastWordPattern; return wordFailFast;
} }
@Override @Override
public IWordContext failFastWordPattern(boolean failFastWordPattern){ public IWordContext wordFailFast(boolean wordFailFast){
this.failFastWordPattern=failFastWordPattern; this.wordFailFast = wordFailFast;
return this; return this;
} }

View File

@@ -5,12 +5,10 @@ import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
import com.github.houbb.sensitive.word.api.IWordCheck; import com.github.houbb.sensitive.word.api.IWordCheck;
import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordData; import com.github.houbb.sensitive.word.api.IWordData;
import com.github.houbb.sensitive.word.api.IWordFormat;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
import com.github.houbb.sensitive.word.support.result.WordLengthResult; import com.github.houbb.sensitive.word.support.result.WordLengthResult;
import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
import java.util.Map; import java.util.Map;
@@ -44,7 +42,7 @@ public class WordCheckWord extends AbstractWordCheck {
final IWordData wordData = context.wordData(); final IWordData wordData = context.wordData();
final IWordData wordDataAllow = context.wordDataAllow(); final IWordData wordDataAllow = context.wordDataAllow();
final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore(); final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore();
final boolean failFast = context.failFastWordPattern(); final boolean failFast = context.wordFailFast();
StringBuilder stringBuilder = new StringBuilder(); StringBuilder stringBuilder = new StringBuilder();
char[] rawChars = txt.toCharArray(); char[] rawChars = txt.toCharArray();

View File

@@ -24,6 +24,7 @@ public class BenchmarkBasicTest {
* 12942ms 第一次优化。 * 12942ms 第一次优化。
* 12983ms 添加对应的 contains 优化,性能无太大变化。 * 12983ms 添加对应的 contains 优化,性能无太大变化。
* *
* 【2025-5-3 01:11:31】1193ms 1281 1201 1256
*/ */
@Test @Test
public void costTimeTest() { public void costTimeTest() {

View File

@@ -9,12 +9,15 @@ import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
/**
* @since 0.26.0
*/
public class SensitiveWordFailFastTest { public class SensitiveWordFailFastTest {
@Test @Test
public void failFastTest() { public void failFastTest() {
SensitiveWordBs bs = SensitiveWordBs.newInstance() SensitiveWordBs bs = SensitiveWordBs.newInstance()
.failFastWordPattern(true) .wordFailFast(true)
.wordDeny(new IWordDeny() { .wordDeny(new IWordDeny() {
@Override @Override
public List<String> deny() { public List<String> deny() {
@@ -23,7 +26,7 @@ public class SensitiveWordFailFastTest {
}).init(); }).init();
SensitiveWordBs bs1 = SensitiveWordBs.newInstance() SensitiveWordBs bs1 = SensitiveWordBs.newInstance()
.failFastWordPattern(true) .wordFailFast(true)
.wordDeny(new IWordDeny() { .wordDeny(new IWordDeny() {
@Override @Override
public List<String> deny() { public List<String> deny() {
@@ -41,7 +44,7 @@ public class SensitiveWordFailFastTest {
//黑长白短,且初始下标一致 //黑长白短,且初始下标一致
SensitiveWordBs bs2 = SensitiveWordBs.newInstance() SensitiveWordBs bs2 = SensitiveWordBs.newInstance()
.failFastWordPattern(true) .wordFailFast(true)
.wordDeny(new IWordDeny() { .wordDeny(new IWordDeny() {
@Override @Override
public List<String> deny() { public List<String> deny() {
@@ -60,7 +63,7 @@ public class SensitiveWordFailFastTest {
//白长黑短,且白和黑初始下标不再一起 //白长黑短,且白和黑初始下标不再一起
SensitiveWordBs bs3 = SensitiveWordBs.newInstance() SensitiveWordBs bs3 = SensitiveWordBs.newInstance()
.failFastWordPattern(true) .wordFailFast(true)
.wordDeny(new IWordDeny() { .wordDeny(new IWordDeny() {
@Override @Override
public List<String> deny() { public List<String> deny() {
@@ -78,7 +81,7 @@ public class SensitiveWordFailFastTest {
//白长黑短,且白和黑初始下标在一起 //白长黑短,且白和黑初始下标在一起
SensitiveWordBs bs4 = SensitiveWordBs.newInstance() SensitiveWordBs bs4 = SensitiveWordBs.newInstance()
.failFastWordPattern(true) .wordFailFast(true)
.wordDeny(new IWordDeny() { .wordDeny(new IWordDeny() {
@Override @Override
public List<String> deny() { public List<String> deny() {
@@ -127,7 +130,7 @@ public class SensitiveWordFailFastTest {
@Test @Test
public void fallOverTest() { public void fallOverTest() {
SensitiveWordBs bs = SensitiveWordBs.newInstance() SensitiveWordBs bs = SensitiveWordBs.newInstance()
.failFastWordPattern(false) .wordFailFast(false)
.wordDeny(new IWordDeny() { .wordDeny(new IWordDeny() {
@Override @Override
public List<String> deny() { public List<String> deny() {
@@ -138,7 +141,7 @@ public class SensitiveWordFailFastTest {
//黑长白短,且初始下标不一致 //黑长白短,且初始下标不一致
SensitiveWordBs bs1 = SensitiveWordBs.newInstance() SensitiveWordBs bs1 = SensitiveWordBs.newInstance()
.failFastWordPattern(false) .wordFailFast(false)
.wordDeny(new IWordDeny() { .wordDeny(new IWordDeny() {
@Override @Override
public List<String> deny() { public List<String> deny() {
@@ -156,7 +159,7 @@ public class SensitiveWordFailFastTest {
//黑长白短,且初始下标一致 //黑长白短,且初始下标一致
SensitiveWordBs bs2 = SensitiveWordBs.newInstance() SensitiveWordBs bs2 = SensitiveWordBs.newInstance()
.failFastWordPattern(false) .wordFailFast(false)
.wordDeny(new IWordDeny() { .wordDeny(new IWordDeny() {
@Override @Override
public List<String> deny() { public List<String> deny() {
@@ -175,7 +178,7 @@ public class SensitiveWordFailFastTest {
//白长黑短,且白和黑初始下标不再一起 //白长黑短,且白和黑初始下标不再一起
SensitiveWordBs bs3 = SensitiveWordBs.newInstance() SensitiveWordBs bs3 = SensitiveWordBs.newInstance()
.failFastWordPattern(false) .wordFailFast(false)
.wordDeny(new IWordDeny() { .wordDeny(new IWordDeny() {
@Override @Override
public List<String> deny() { public List<String> deny() {
@@ -193,7 +196,7 @@ public class SensitiveWordFailFastTest {
//白长黑短,且白和黑初始下标在一起 //白长黑短,且白和黑初始下标在一起
SensitiveWordBs bs4 = SensitiveWordBs.newInstance() SensitiveWordBs bs4 = SensitiveWordBs.newInstance()
.failFastWordPattern(false) .wordFailFast(false)
.wordDeny(new IWordDeny() { .wordDeny(new IWordDeny() {
@Override @Override
public List<String> deny() { public List<String> deny() {
@@ -238,6 +241,32 @@ public class SensitiveWordFailFastTest {
} }
@Test
public void fallOverTest2() {
SensitiveWordBs bs = SensitiveWordBs.newInstance()
.wordFailFast(false)
.wordDeny(new IWordDeny() {
@Override
public List<String> deny() {
return Arrays.asList("我的世界", "我的");
}
}).init();
String text = "他的世界它的世界和她的世界都不是我的也不是我的世界";
List<String> textList = bs.findAll(text);
Assert.assertEquals(Arrays.asList("我的", "我的世界"), textList);
SensitiveWordBs bs2 = SensitiveWordBs.newInstance()
.wordDeny(new IWordDeny() {
@Override
public List<String> deny() {
return Arrays.asList("我的世界", "我的");
}
}).init();
List<String> textList2 = bs2.findAll(text);
Assert.assertEquals(Arrays.asList("我的", "我的"), textList2);
}
} }