release branch 0.13.0

This commit is contained in:
houbb
2024-02-19 20:04:46 +08:00
parent f745bc49cf
commit 8772b1b810
15 changed files with 374 additions and 25 deletions

View File

@@ -224,4 +224,10 @@
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|----------------------------------------------|:--------------------|:------|
| 1 | A | 添加 wordTags 标签结果处理类 | 2023-12-18 23:51:58 | |
| 2 | A | 添加 AbstractWordResultHandler 处理类,便于后续拓展和统一管理 | 2023-12-18 23:51:58 | |
| 2 | A | 添加 AbstractWordResultHandler 处理类,便于后续拓展和统一管理 | 2023-12-18 23:51:58 | |
# release_0.13.0
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|-----------------------------------|:--------------------|:------|
| 1 | A | 对匹配后的单词,额外可以做一次校验,比如做一次英文全词匹配的验证。 | 2024-02-19 23:51:58 | |

View File

@@ -82,7 +82,7 @@
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.12.0</version>
<version>0.13.0</version>
</dependency>
```
@@ -429,6 +429,7 @@ SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
.numCheckLen(8)
.wordTag(WordTags.none())
.charIgnore(SensitiveWordCharIgnores.defaults())
.wordResultCondition(WordResultConditions.alwaysTrue())
.init();
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
@@ -438,22 +439,74 @@ Assert.assertTrue(wordBs.contains(text));
其中各项配置的说明如下:
| 序号 | 方法 | 说明 | 默认值 |
|:---|:---------------------|:--------------|:------|
| 1 | ignoreCase | 忽略大小写 | true |
| 2 | ignoreWidth | 忽略半角圆角 | true |
| 3 | ignoreNumStyle | 忽略数字的写法 | true |
| 4 | ignoreChineseStyle | 忽略中文的书写格式 | true |
| 5 | ignoreEnglishStyle | 忽略英文的书写格式 | true |
| 6 | ignoreRepeat | 忽略重复词 | false |
| 7 | enableNumCheck | 是否启用数字检测。 | true |
| 8 | enableEmailCheck | 是有启用邮箱检测 | true |
| 9 | enableUrlCheck | 是否启用链接检测 | true |
| 10 | enableWordCheck | 是否启用敏感单词检测 | true |
| 11 | numCheckLen | 数字检测,自定义指定长度。 | 8 |
| 12 | wordTag | 词对应的标签 | none |
| 13 | charIgnore | 忽略的字符 | none |
| 序号 | 方法 | 说明 | 默认值 |
|:---|:---------------------|:-----------------------------|:------|
| 1 | ignoreCase | 忽略大小写 | true |
| 2 | ignoreWidth | 忽略半角圆角 | true |
| 3 | ignoreNumStyle | 忽略数字的写法 | true |
| 4 | ignoreChineseStyle | 忽略中文的书写格式 | true |
| 5 | ignoreEnglishStyle | 忽略英文的书写格式 | true |
| 6 | ignoreRepeat | 忽略重复词 | false |
| 7 | enableNumCheck | 是否启用数字检测。 | true |
| 8 | enableEmailCheck | 是有启用邮箱检测 | true |
| 9 | enableUrlCheck | 是否启用链接检测 | true |
| 10 | enableWordCheck | 是否启用敏感单词检测 | true |
| 11 | numCheckLen | 数字检测,自定义指定长度。 | 8 |
| 12 | wordTag | 词对应的标签 | none |
| 13 | charIgnore | 忽略的字符 | none |
| 14 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 |
# wordResultCondition-针对匹配词进一步判断
## 说明
支持版本v0.13.0
有时候我们可能希望对匹配的敏感词进一步限制比如虽然我们定义了【av】作为敏感词但是不希望【have】被匹配。
就可以自定义实现 wordResultCondition 接口,实现自己的策略。
系统内置的策略在 WordResultConditions#alwaysTrue() 恒为真WordResultConditions#englishWordMatch() 则要求英文必须全词匹配。
## 入门例子
原始的默认情况:
```java
final String text = "I have a nice day。";
List<String> wordList = SensitiveWordBs.newInstance()
.wordDeny(new IWordDeny() {
@Override
public List<String> deny() {
return Collections.singletonList("av");
}
})
.wordResultCondition(WordResultConditions.alwaysTrue())
.init()
.findAll(text);
Assert.assertEquals("[av]", wordList.toString());
```
我们可以指定为英文必须全词匹配。
```java
final String text = "I have a nice day。";
List<String> wordList = SensitiveWordBs.newInstance()
.wordDeny(new IWordDeny() {
@Override
public List<String> deny() {
return Collections.singletonList("av");
}
})
.wordResultCondition(WordResultConditions.englishWordMatch())
.init()
.findAll(text);
Assert.assertEquals("[]", wordList.toString());
```
当然可以根据需要实现更加复杂的策略。
# 忽略字符

View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.12.0</version>
<version>0.13.0</version>
<properties>
<!--============================== All Plugins START ==============================-->

View File

@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
SET version=0.12.0
SET version=0.13.0
:::: 新版本名称
SET newVersion=0.13.0
SET newVersion=0.14.0
:::: 组织名称
SET groupName=com.github.houbb
:::: 项目名称

View File

@@ -241,4 +241,8 @@ public interface IWordContext {
SensitiveWordContext charIgnore(ISensitiveWordCharIgnore charIgnore);
IWordResultCondition wordResultCondition();
SensitiveWordContext wordResultCondition(IWordResultCondition wordResultCondition);
}

View File

@@ -0,0 +1,26 @@
package com.github.houbb.sensitive.word.api;
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
/**
* 敏感词的结果是否匹配
* @author binbin.hou
* @since 0.13.0
*/
public interface IWordResultCondition {
/**
* 是否匹配
* @param wordResult 根据词匹配的结果
* @param text 原始文本
* @param modeEnum 枚举类别
* @param context 上下文
* @return 是否匹配
* @since 0.13.0
*/
boolean match(final IWordResult wordResult,
final String text,
final WordValidModeEnum modeEnum,
final IWordContext context);
}

View File

@@ -18,6 +18,7 @@ import com.github.houbb.sensitive.word.support.deny.WordDenys;
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
import com.github.houbb.sensitive.word.support.replace.WordReplaces;
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
import com.github.houbb.sensitive.word.support.tag.WordTags;
import java.util.Collection;
@@ -163,6 +164,12 @@ public class SensitiveWordBs {
*/
private ISensitiveWordCharIgnore charIgnore = SensitiveWordCharIgnores.defaults();
/**
* 敏感词结果匹配策略
* @since 0.13.0
*/
private IWordResultCondition wordResultCondition = WordResultConditions.alwaysTrue();
/**
* 新建验证实例
* <p>
@@ -233,10 +240,18 @@ public class SensitiveWordBs {
context.wordData(wordData);
context.wordTag(wordTag);
context.charIgnore(charIgnore);
context.wordResultCondition(wordResultCondition);
return context;
}
public SensitiveWordBs wordResultCondition(IWordResultCondition wordResultCondition) {
ArgUtil.notNull(wordResultCondition, "wordResultCondition");
this.wordResultCondition = wordResultCondition;
return this;
}
public SensitiveWordBs charIgnore(ISensitiveWordCharIgnore charIgnore) {
ArgUtil.notNull(charIgnore, "charIgnore");

View File

@@ -113,6 +113,13 @@ public class SensitiveWordContext implements IWordContext {
*/
private ISensitiveWordCharIgnore charIgnore;
/**
* 敏感词结果匹配
*
* @since 0.13.0
*/
private IWordResultCondition wordResultCondition;
public IWordData wordData() {
return wordData;
}
@@ -304,4 +311,13 @@ public class SensitiveWordContext implements IWordContext {
this.charIgnore = charIgnore;
return this;
}
public IWordResultCondition wordResultCondition() {
return wordResultCondition;
}
public SensitiveWordContext wordResultCondition(IWordResultCondition wordResultCondition) {
this.wordResultCondition = wordResultCondition;
return this;
}
}

View File

@@ -1,10 +1,7 @@
package com.github.houbb.sensitive.word.core;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.sensitive.word.api.IWordCheck;
import com.github.houbb.sensitive.word.api.ISensitiveWord;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
import com.github.houbb.sensitive.word.api.*;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
import com.github.houbb.sensitive.word.support.check.WordCheckResult;
@@ -59,6 +56,7 @@ public class SensitiveWord extends AbstractSensitiveWord {
.wordContext(context)
.modeEnum(WordValidModeEnum.FAIL_OVER)
.formatCharMapping(characterCharacterMap);
final IWordResultCondition wordResultCondition = context.wordResultCondition();
for (int i = 0; i < text.length(); i++) {
WordCheckResult checkResult = sensitiveCheck.sensitiveCheck(i, checkContext);
@@ -70,7 +68,10 @@ public class SensitiveWord extends AbstractSensitiveWord {
WordResult wordResult = WordResult.newInstance()
.startIndex(i)
.endIndex(i+wordLength);
resultList.add(wordResult);
//v0.13.0 添加判断
if(wordResultCondition.match(wordResult, text, modeEnum, context)) {
resultList.add(wordResult);
}
// 快速返回
if (WordValidModeEnum.FAIL_FAST.equals(modeEnum)) {

View File

@@ -0,0 +1,22 @@
package com.github.houbb.sensitive.word.support.resultcondition;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
import com.github.houbb.sensitive.word.api.IWordResultCondition;
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
/**
* 抽象实现
*
* @since 0.13.0
*/
public abstract class AbstractWordResultCondition implements IWordResultCondition {
protected abstract boolean doMatch(IWordResult wordResult, String text, WordValidModeEnum modeEnum, IWordContext context);
@Override
public boolean match(IWordResult wordResult, String text, WordValidModeEnum modeEnum, IWordContext context) {
return doMatch(wordResult, text, modeEnum, context);
}
}

View File

@@ -0,0 +1,19 @@
package com.github.houbb.sensitive.word.support.resultcondition;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
/**
* 恒为真
*
* @since 0.13.0
*/
public class WordResultConditionAlwaysTrue extends AbstractWordResultCondition {
@Override
protected boolean doMatch(IWordResult wordResult, String text, WordValidModeEnum modeEnum, IWordContext context) {
return true;
}
}

View File

@@ -0,0 +1,50 @@
package com.github.houbb.sensitive.word.support.resultcondition;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.heaven.util.util.CharsetUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordResult;
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
/**
* 英文单词必须要全词匹配
*
* https://github.com/houbb/sensitive-word/issues/45
*
* @since 0.13.0
*/
public class WordResultConditionEnglishWordMatch extends AbstractWordResultCondition {
@Override
protected boolean doMatch(IWordResult wordResult, String text, WordValidModeEnum modeEnum, IWordContext context) {
final int startIndex = wordResult.startIndex();
final int endIndex = wordResult.endIndex();
// 判断当前是否为英文单词
for(int i = startIndex; i < endIndex; i++) {
char c = text.charAt(i);
if(!CharUtil.isEnglish(c)) {
return true;
}
}
// 判断处理,判断前一个字符是否为英文。如果是,则不满足
if(startIndex > 0) {
char preC = text.charAt(startIndex-1);
if(CharUtil.isEnglish(preC)) {
return false;
}
}
// 判断后一个字符是否为英文
if(endIndex < text.length() - 1) {
char afterC = text.charAt(endIndex+1);
if(CharUtil.isEnglish(afterC)) {
return false;
}
}
return true;
}
}

View File

@@ -0,0 +1,29 @@
package com.github.houbb.sensitive.word.support.resultcondition;
import com.github.houbb.sensitive.word.api.IWordResultCondition;
/**
* 匹配结果工具类
*
* @since 0.13.0
*/
public final class WordResultConditions {
/**
* 恒为真
* @return 结果
*/
public static IWordResultCondition alwaysTrue() {
return new WordResultConditionAlwaysTrue();
}
/**
* 如果是英文,则必须全词匹匹配
* @return 结果
* @since 0.13.0
*/
public static IWordResultCondition englishWordMatch() {
return new WordResultConditionEnglishWordMatch();
}
}

View File

@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.bs;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
import com.github.houbb.sensitive.word.support.tag.WordTags;
import org.junit.Assert;
import org.junit.Test;
@@ -31,6 +32,7 @@ public class SensitiveWordBsConfigTest {
.numCheckLen(8)
.wordTag(WordTags.none())
.charIgnore(SensitiveWordCharIgnores.defaults())
.wordResultCondition(WordResultConditions.alwaysTrue())
.init();
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";

View File

@@ -0,0 +1,106 @@
package com.github.houbb.sensitive.word.bs;
import com.github.houbb.sensitive.word.api.IWordDeny;
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
import org.junit.Assert;
import org.junit.Test;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
/**
* <p> project: sensitive-word-SensitiveWordBsTest </p>
* <p> create on 2020/1/7 23:43 </p>
*
* @author Administrator
* @since 0.13.0
*/
public class SensitiveWordBsResultConditionTest {
@Test
public void alwaysTrueTest() {
final String text = "I have a nice day。";
List<String> wordList = SensitiveWordBs.newInstance()
.wordDeny(new IWordDeny() {
@Override
public List<String> deny() {
return Collections.singletonList("av");
}
})
.wordResultCondition(WordResultConditions.alwaysTrue())
.init()
.findAll(text);
Assert.assertEquals("[av]", wordList.toString());
}
@Test
public void englishWordMatchTest() {
final String text = "I have a nice day。";
List<String> wordList = SensitiveWordBs.newInstance()
.wordDeny(new IWordDeny() {
@Override
public List<String> deny() {
return Collections.singletonList("av");
}
})
.wordResultCondition(WordResultConditions.englishWordMatch())
.init()
.findAll(text);
Assert.assertEquals("[]", wordList.toString());
}
@Test
public void englishWordMatchTest2() {
final String text = "I hav";
List<String> wordList = SensitiveWordBs.newInstance()
.wordDeny(new IWordDeny() {
@Override
public List<String> deny() {
return Arrays.asList("av");
}
})
.wordResultCondition(WordResultConditions.englishWordMatch())
.init()
.findAll(text);
Assert.assertEquals("[]", wordList.toString());
}
@Test
public void englishWordMatchTest3() {
final String text = "av";
List<String> wordList = SensitiveWordBs.newInstance()
.wordDeny(new IWordDeny() {
@Override
public List<String> deny() {
return Arrays.asList("av");
}
})
.wordResultCondition(WordResultConditions.englishWordMatch())
.init()
.findAll(text);
Assert.assertEquals("[av]", wordList.toString());
}
@Test
public void englishWordMatchTest4() {
final String text = "I have a nice day";
List<String> wordList = SensitiveWordBs.newInstance()
.wordDeny(new IWordDeny() {
@Override
public List<String> deny() {
return Arrays.asList("av", "day");
}
})
.wordResultCondition(WordResultConditions.englishWordMatch())
.init()
.findAll(text);
Assert.assertEquals("[day]", wordList.toString());
}
}