mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
release branch 0.13.0
This commit is contained in:
@@ -225,3 +225,9 @@
|
||||
|:---|:-----|----------------------------------------------|:--------------------|:------|
|
||||
| 1 | A | 添加 wordTags 标签结果处理类 | 2023-12-18 23:51:58 | |
|
||||
| 2 | A | 添加 AbstractWordResultHandler 处理类,便于后续拓展和统一管理 | 2023-12-18 23:51:58 | |
|
||||
|
||||
# release_0.13.0
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|-----------------------------------|:--------------------|:------|
|
||||
| 1 | A | 对匹配后的单词,额外可以做一次校验,比如做一次英文全词匹配的验证。 | 2024-02-19 23:51:58 | |
|
||||
|
||||
57
README.md
57
README.md
@@ -82,7 +82,7 @@
|
||||
<dependency>
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.12.0</version>
|
||||
<version>0.13.0</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@@ -429,6 +429,7 @@ SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
|
||||
.numCheckLen(8)
|
||||
.wordTag(WordTags.none())
|
||||
.charIgnore(SensitiveWordCharIgnores.defaults())
|
||||
.wordResultCondition(WordResultConditions.alwaysTrue())
|
||||
.init();
|
||||
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
@@ -439,7 +440,7 @@ Assert.assertTrue(wordBs.contains(text));
|
||||
其中各项配置的说明如下:
|
||||
|
||||
| 序号 | 方法 | 说明 | 默认值 |
|
||||
|:---|:---------------------|:--------------|:------|
|
||||
|:---|:---------------------|:-----------------------------|:------|
|
||||
| 1 | ignoreCase | 忽略大小写 | true |
|
||||
| 2 | ignoreWidth | 忽略半角圆角 | true |
|
||||
| 3 | ignoreNumStyle | 忽略数字的写法 | true |
|
||||
@@ -453,7 +454,59 @@ Assert.assertTrue(wordBs.contains(text));
|
||||
| 11 | numCheckLen | 数字检测,自定义指定长度。 | 8 |
|
||||
| 12 | wordTag | 词对应的标签 | none |
|
||||
| 13 | charIgnore | 忽略的字符 | none |
|
||||
| 14 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 |
|
||||
|
||||
# wordResultCondition-针对匹配词进一步判断
|
||||
|
||||
## 说明
|
||||
|
||||
支持版本:v0.13.0
|
||||
|
||||
有时候我们可能希望对匹配的敏感词进一步限制,比如虽然我们定义了【av】作为敏感词,但是不希望【have】被匹配。
|
||||
|
||||
就可以自定义实现 wordResultCondition 接口,实现自己的策略。
|
||||
|
||||
系统内置的策略在 WordResultConditions#alwaysTrue() 恒为真,WordResultConditions#englishWordMatch() 则要求英文必须全词匹配。
|
||||
|
||||
## 入门例子
|
||||
|
||||
原始的默认情况:
|
||||
|
||||
```java
|
||||
final String text = "I have a nice day。";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance()
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Collections.singletonList("av");
|
||||
}
|
||||
})
|
||||
.wordResultCondition(WordResultConditions.alwaysTrue())
|
||||
.init()
|
||||
.findAll(text);
|
||||
Assert.assertEquals("[av]", wordList.toString());
|
||||
```
|
||||
|
||||
我们可以指定为英文必须全词匹配。
|
||||
|
||||
```java
|
||||
final String text = "I have a nice day。";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance()
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Collections.singletonList("av");
|
||||
}
|
||||
})
|
||||
.wordResultCondition(WordResultConditions.englishWordMatch())
|
||||
.init()
|
||||
.findAll(text);
|
||||
Assert.assertEquals("[]", wordList.toString());
|
||||
```
|
||||
|
||||
当然可以根据需要实现更加复杂的策略。
|
||||
|
||||
# 忽略字符
|
||||
|
||||
|
||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.12.0</version>
|
||||
<version>0.13.0</version>
|
||||
|
||||
<properties>
|
||||
<!--============================== All Plugins START ==============================-->
|
||||
|
||||
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
|
||||
|
||||
:: 版本号信息(需要手动指定)
|
||||
:::: 旧版本名称
|
||||
SET version=0.12.0
|
||||
SET version=0.13.0
|
||||
:::: 新版本名称
|
||||
SET newVersion=0.13.0
|
||||
SET newVersion=0.14.0
|
||||
:::: 组织名称
|
||||
SET groupName=com.github.houbb
|
||||
:::: 项目名称
|
||||
|
||||
@@ -241,4 +241,8 @@ public interface IWordContext {
|
||||
|
||||
SensitiveWordContext charIgnore(ISensitiveWordCharIgnore charIgnore);
|
||||
|
||||
IWordResultCondition wordResultCondition();
|
||||
|
||||
SensitiveWordContext wordResultCondition(IWordResultCondition wordResultCondition);
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
package com.github.houbb.sensitive.word.api;
|
||||
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
|
||||
|
||||
/**
|
||||
* 敏感词的结果是否匹配
|
||||
* @author binbin.hou
|
||||
* @since 0.13.0
|
||||
*/
|
||||
public interface IWordResultCondition {
|
||||
|
||||
/**
|
||||
* 是否匹配
|
||||
* @param wordResult 根据词匹配的结果
|
||||
* @param text 原始文本
|
||||
* @param modeEnum 枚举类别
|
||||
* @param context 上下文
|
||||
* @return 是否匹配
|
||||
* @since 0.13.0
|
||||
*/
|
||||
boolean match(final IWordResult wordResult,
|
||||
final String text,
|
||||
final WordValidModeEnum modeEnum,
|
||||
final IWordContext context);
|
||||
|
||||
}
|
||||
@@ -18,6 +18,7 @@ import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
|
||||
import com.github.houbb.sensitive.word.support.replace.WordReplaces;
|
||||
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
|
||||
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
|
||||
import com.github.houbb.sensitive.word.support.tag.WordTags;
|
||||
|
||||
import java.util.Collection;
|
||||
@@ -163,6 +164,12 @@ public class SensitiveWordBs {
|
||||
*/
|
||||
private ISensitiveWordCharIgnore charIgnore = SensitiveWordCharIgnores.defaults();
|
||||
|
||||
/**
|
||||
* 敏感词结果匹配策略
|
||||
* @since 0.13.0
|
||||
*/
|
||||
private IWordResultCondition wordResultCondition = WordResultConditions.alwaysTrue();
|
||||
|
||||
/**
|
||||
* 新建验证实例
|
||||
* <p>
|
||||
@@ -233,10 +240,18 @@ public class SensitiveWordBs {
|
||||
context.wordData(wordData);
|
||||
context.wordTag(wordTag);
|
||||
context.charIgnore(charIgnore);
|
||||
context.wordResultCondition(wordResultCondition);
|
||||
|
||||
return context;
|
||||
}
|
||||
|
||||
public SensitiveWordBs wordResultCondition(IWordResultCondition wordResultCondition) {
|
||||
ArgUtil.notNull(wordResultCondition, "wordResultCondition");
|
||||
|
||||
this.wordResultCondition = wordResultCondition;
|
||||
return this;
|
||||
}
|
||||
|
||||
public SensitiveWordBs charIgnore(ISensitiveWordCharIgnore charIgnore) {
|
||||
ArgUtil.notNull(charIgnore, "charIgnore");
|
||||
|
||||
|
||||
@@ -113,6 +113,13 @@ public class SensitiveWordContext implements IWordContext {
|
||||
*/
|
||||
private ISensitiveWordCharIgnore charIgnore;
|
||||
|
||||
/**
|
||||
* 敏感词结果匹配
|
||||
*
|
||||
* @since 0.13.0
|
||||
*/
|
||||
private IWordResultCondition wordResultCondition;
|
||||
|
||||
public IWordData wordData() {
|
||||
return wordData;
|
||||
}
|
||||
@@ -304,4 +311,13 @@ public class SensitiveWordContext implements IWordContext {
|
||||
this.charIgnore = charIgnore;
|
||||
return this;
|
||||
}
|
||||
|
||||
public IWordResultCondition wordResultCondition() {
|
||||
return wordResultCondition;
|
||||
}
|
||||
|
||||
public SensitiveWordContext wordResultCondition(IWordResultCondition wordResultCondition) {
|
||||
this.wordResultCondition = wordResultCondition;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
package com.github.houbb.sensitive.word.core;
|
||||
|
||||
import com.github.houbb.heaven.util.guava.Guavas;
|
||||
import com.github.houbb.sensitive.word.api.IWordCheck;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWord;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
import com.github.houbb.sensitive.word.api.*;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.WordCheckResult;
|
||||
@@ -59,6 +56,7 @@ public class SensitiveWord extends AbstractSensitiveWord {
|
||||
.wordContext(context)
|
||||
.modeEnum(WordValidModeEnum.FAIL_OVER)
|
||||
.formatCharMapping(characterCharacterMap);
|
||||
final IWordResultCondition wordResultCondition = context.wordResultCondition();
|
||||
|
||||
for (int i = 0; i < text.length(); i++) {
|
||||
WordCheckResult checkResult = sensitiveCheck.sensitiveCheck(i, checkContext);
|
||||
@@ -70,7 +68,10 @@ public class SensitiveWord extends AbstractSensitiveWord {
|
||||
WordResult wordResult = WordResult.newInstance()
|
||||
.startIndex(i)
|
||||
.endIndex(i+wordLength);
|
||||
//v0.13.0 添加判断
|
||||
if(wordResultCondition.match(wordResult, text, modeEnum, context)) {
|
||||
resultList.add(wordResult);
|
||||
}
|
||||
|
||||
// 快速返回
|
||||
if (WordValidModeEnum.FAIL_FAST.equals(modeEnum)) {
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.github.houbb.sensitive.word.support.resultcondition;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
import com.github.houbb.sensitive.word.api.IWordResultCondition;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
|
||||
|
||||
/**
|
||||
* 抽象实现
|
||||
*
|
||||
* @since 0.13.0
|
||||
*/
|
||||
public abstract class AbstractWordResultCondition implements IWordResultCondition {
|
||||
|
||||
protected abstract boolean doMatch(IWordResult wordResult, String text, WordValidModeEnum modeEnum, IWordContext context);
|
||||
|
||||
@Override
|
||||
public boolean match(IWordResult wordResult, String text, WordValidModeEnum modeEnum, IWordContext context) {
|
||||
return doMatch(wordResult, text, modeEnum, context);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
package com.github.houbb.sensitive.word.support.resultcondition;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
|
||||
|
||||
/**
|
||||
* 恒为真
|
||||
*
|
||||
* @since 0.13.0
|
||||
*/
|
||||
public class WordResultConditionAlwaysTrue extends AbstractWordResultCondition {
|
||||
|
||||
@Override
|
||||
protected boolean doMatch(IWordResult wordResult, String text, WordValidModeEnum modeEnum, IWordContext context) {
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
package com.github.houbb.sensitive.word.support.resultcondition;
|
||||
|
||||
import com.github.houbb.heaven.util.lang.CharUtil;
|
||||
import com.github.houbb.heaven.util.util.CharsetUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
|
||||
|
||||
/**
|
||||
* 英文单词必须要全词匹配
|
||||
*
|
||||
* https://github.com/houbb/sensitive-word/issues/45
|
||||
*
|
||||
* @since 0.13.0
|
||||
*/
|
||||
public class WordResultConditionEnglishWordMatch extends AbstractWordResultCondition {
|
||||
|
||||
@Override
|
||||
protected boolean doMatch(IWordResult wordResult, String text, WordValidModeEnum modeEnum, IWordContext context) {
|
||||
final int startIndex = wordResult.startIndex();
|
||||
final int endIndex = wordResult.endIndex();
|
||||
// 判断当前是否为英文单词
|
||||
for(int i = startIndex; i < endIndex; i++) {
|
||||
char c = text.charAt(i);
|
||||
if(!CharUtil.isEnglish(c)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// 判断处理,判断前一个字符是否为英文。如果是,则不满足
|
||||
if(startIndex > 0) {
|
||||
char preC = text.charAt(startIndex-1);
|
||||
if(CharUtil.isEnglish(preC)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// 判断后一个字符是否为英文
|
||||
if(endIndex < text.length() - 1) {
|
||||
char afterC = text.charAt(endIndex+1);
|
||||
if(CharUtil.isEnglish(afterC)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
package com.github.houbb.sensitive.word.support.resultcondition;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordResultCondition;
|
||||
|
||||
/**
|
||||
* 匹配结果工具类
|
||||
*
|
||||
* @since 0.13.0
|
||||
*/
|
||||
public final class WordResultConditions {
|
||||
|
||||
/**
|
||||
* 恒为真
|
||||
* @return 结果
|
||||
*/
|
||||
public static IWordResultCondition alwaysTrue() {
|
||||
return new WordResultConditionAlwaysTrue();
|
||||
}
|
||||
|
||||
/**
|
||||
* 如果是英文,则必须全词匹匹配
|
||||
* @return 结果
|
||||
* @since 0.13.0
|
||||
*/
|
||||
public static IWordResultCondition englishWordMatch() {
|
||||
return new WordResultConditionEnglishWordMatch();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.bs;
|
||||
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
||||
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
|
||||
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
|
||||
import com.github.houbb.sensitive.word.support.tag.WordTags;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
@@ -31,6 +32,7 @@ public class SensitiveWordBsConfigTest {
|
||||
.numCheckLen(8)
|
||||
.wordTag(WordTags.none())
|
||||
.charIgnore(SensitiveWordCharIgnores.defaults())
|
||||
.wordResultCondition(WordResultConditions.alwaysTrue())
|
||||
.init();
|
||||
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* <p> project: sensitive-word-SensitiveWordBsTest </p>
|
||||
* <p> create on 2020/1/7 23:43 </p>
|
||||
*
|
||||
* @author Administrator
|
||||
* @since 0.13.0
|
||||
*/
|
||||
public class SensitiveWordBsResultConditionTest {
|
||||
|
||||
@Test
|
||||
public void alwaysTrueTest() {
|
||||
final String text = "I have a nice day。";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance()
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Collections.singletonList("av");
|
||||
}
|
||||
})
|
||||
.wordResultCondition(WordResultConditions.alwaysTrue())
|
||||
.init()
|
||||
.findAll(text);
|
||||
Assert.assertEquals("[av]", wordList.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void englishWordMatchTest() {
|
||||
final String text = "I have a nice day。";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance()
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Collections.singletonList("av");
|
||||
}
|
||||
})
|
||||
.wordResultCondition(WordResultConditions.englishWordMatch())
|
||||
.init()
|
||||
.findAll(text);
|
||||
Assert.assertEquals("[]", wordList.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void englishWordMatchTest2() {
|
||||
final String text = "I hav";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance()
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Arrays.asList("av");
|
||||
}
|
||||
})
|
||||
.wordResultCondition(WordResultConditions.englishWordMatch())
|
||||
.init()
|
||||
.findAll(text);
|
||||
Assert.assertEquals("[]", wordList.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void englishWordMatchTest3() {
|
||||
final String text = "av";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance()
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Arrays.asList("av");
|
||||
}
|
||||
})
|
||||
.wordResultCondition(WordResultConditions.englishWordMatch())
|
||||
.init()
|
||||
.findAll(text);
|
||||
Assert.assertEquals("[av]", wordList.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void englishWordMatchTest4() {
|
||||
final String text = "I have a nice day";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance()
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Arrays.asList("av", "day");
|
||||
}
|
||||
})
|
||||
.wordResultCondition(WordResultConditions.englishWordMatch())
|
||||
.init()
|
||||
.findAll(text);
|
||||
Assert.assertEquals("[day]", wordList.toString());
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user