release branch 0.21.0

This commit is contained in:
binbin.hou
2024-09-18 18:48:04 +08:00
parent a4d69462d9
commit 3d101b639d
13 changed files with 416 additions and 20 deletions

View File

@@ -360,4 +360,11 @@
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|-----------|:-------------------|:--------------------------------------------------|
| 1 | A | 支持数字的全词匹配 | 2024-9-18 16:39:40 | https://github.com/houbb/sensitive-word/issues/77 |
| 1 | A | 支持数字的全词匹配 | 2024-9-18 19:39:40 | https://github.com/houbb/sensitive-word/issues/77 |
# release_0.21.0
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|------------------------------------|:-------------------|:------------------------------------------------------|
| 1 | O | 优化白名单的匹配策略,避免长白名单时,匹配到短的黑名单,不符合预期。 | 2024-9-18 21:39:40 | https://github.com/houbb/sensitive-word/issues/76 +19 |
| 2 | A | 白名单支持单个编辑 | 2024-9-18 21:39:40 | 避免全量初始化 |

View File

@@ -48,7 +48,7 @@
- [支持跳过一些特殊字符,让匹配更灵活](https://github.com/houbb/sensitive-word#%E5%BF%BD%E7%95%A5%E5%AD%97%E7%AC%A6)
- [支持单个的新增/修改,无需全量初始化](https://github.com/houbb/sensitive-word?tab=readme-ov-file#%E9%92%88%E5%AF%B9%E5%8D%95%E4%B8%AA%E8%AF%8D%E7%9A%84%E6%96%B0%E5%A2%9E%E5%88%A0%E9%99%A4%E6%97%A0%E9%9C%80%E5%85%A8%E9%87%8F%E5%88%9D%E5%A7%8B%E5%8C%96)
- [支持黑白名单单个的新增/修改,无需全量初始化](https://github.com/houbb/sensitive-word?tab=readme-ov-file#%E9%92%88%E5%AF%B9%E5%8D%95%E4%B8%AA%E8%AF%8D%E7%9A%84%E6%96%B0%E5%A2%9E%E5%88%A0%E9%99%A4%E6%97%A0%E9%9C%80%E5%85%A8%E9%87%8F%E5%88%9D%E5%A7%8B%E5%8C%96)
## 变更日志
@@ -63,6 +63,12 @@
- 新增数字+英文的全词匹配实现
### V0.21.0
- 修正白名单较长,包含了黑名单,导致白名单不符合预期的场景。
- 新增了白名单单个的编辑操作
## 更多资料
### 敏感词控台
@@ -93,7 +99,7 @@
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.20.0</version>
<version>0.21.0</version>
</dependency>
```
@@ -489,7 +495,7 @@ SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
wordBs.destroy();
```
## 针对单个词的新增/删除,无需全量初始化
## 针对单个黑名单词的新增/删除,无需全量初始化
使用场景:在初始化之后,我们希望针对单个词的新增/删除,而不是完全重新初始化。这个特性就是为此准备的。
@@ -541,6 +547,63 @@ sensitiveWordBs.removeWord("新增", "测试");
Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString());
```
## 针对单个白名单词的新增/删除,无需全量初始化
使用场景:在初始化之后,我们希望针对单个词的新增/删除,而不是完全重新初始化。这个特性就是为此准备的。
支持版本v0.21.0
### 方法说明
`addWordAllow(word)` 新增白名单,支持单个词/集合
`removeWordAllow(word)` 删除白名单,支持单个词/集合
### 使用例子
```java
final String text = "测试一下新增敏感词白名单,验证一下删除和新增对不对";
SensitiveWordBs sensitiveWordBs =
SensitiveWordBs.newInstance()
.wordAllow(WordAllows.empty())
.wordDeny(new IWordDeny() {
@Override
public List<String> deny() {
return Arrays.asList("测试", "新增");
}
})
.init();
// 当前
Assert.assertEquals("[测试, 新增, 新增]", sensitiveWordBs.findAll(text).toString());
// 新增单个
sensitiveWordBs.addWordAllow("测试");
sensitiveWordBs.addWordAllow("新增");
Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString());
// 删除单个
sensitiveWordBs.removeWordAllow("测试");
Assert.assertEquals("[测试]", sensitiveWordBs.findAll(text).toString());
sensitiveWordBs.removeWordAllow("新增");
Assert.assertEquals("[测试, 新增, 新增]", sensitiveWordBs.findAll(text).toString());
// 新增集合
sensitiveWordBs.addWordAllow(Arrays.asList("新增", "测试"));
Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString());
// 删除集合
sensitiveWordBs.removeWordAllow(Arrays.asList("新增", "测试"));
Assert.assertEquals("[测试, 新增, 新增]", sensitiveWordBs.findAll(text).toString());
// 新增数组
sensitiveWordBs.addWordAllow("新增", "测试");
Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString());
// 删除集合
sensitiveWordBs.removeWordAllow("新增", "测试");
Assert.assertEquals("[测试, 新增, 新增]", sensitiveWordBs.findAll(text).toString());
```
# wordResultCondition-针对匹配词进一步判断
## 说明

View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.20.0</version>
<version>0.21.0</version>
<properties>
<!--============================== All Plugins START ==============================-->

View File

@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
SET version=0.20.0
SET version=0.21.0
:::: 新版本名称
SET newVersion=0.21.0
SET newVersion=0.22.0
:::: 组织名称
SET groupName=com.github.houbb
:::: 项目名称

View File

@@ -248,6 +248,21 @@ public interface IWordContext {
*/
IWordContext wordData(IWordData wordMap);
/**
* 获取 wordDataAllow 策略
* @return 策略
* @since 0.21.0
*/
IWordData wordDataAllow();
/**
* 设置 wordDataAllow 策略
* @param wordDataAllow 策略
* @return this
* @since 0.3.2
*/
IWordContext wordDataAllow(IWordData wordDataAllow);
IWordTag wordTag();
SensitiveWordContext wordTag(IWordTag wordTag);

View File

@@ -1,8 +1,6 @@
package com.github.houbb.sensitive.word.api.combine;
import com.github.houbb.sensitive.word.api.IWordAllow;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordDeny;
import java.util.Collection;
import java.util.List;
@@ -15,14 +13,14 @@ public interface IWordAllowDenyCombine {
/**
* 获取最终的拒绝单词列表
* @param wordAllow 允许
* @param wordDeny 拒绝
* @param allowList 允许
* @param denyList 拒绝
* @param context 上下文
* @return 结果
* @since 0.8.0
*/
Collection<String> getActualDenyList(IWordAllow wordAllow,
IWordDeny wordDeny,
Collection<String> getActualDenyList(final List<String> allowList,
final List<String> denyList,
final IWordContext context);
}

View File

@@ -20,6 +20,7 @@ import com.github.houbb.sensitive.word.support.replace.WordReplaces;
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
import com.github.houbb.sensitive.word.support.tag.WordTags;
import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
import java.util.*;
@@ -107,12 +108,19 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
private ISensitiveWord sensitiveWord = SensitiveWords.defaults();
/**
* 敏感词 Data
* 敏感词(黑名单) Data
*
* @since 0.0.1
*/
private IWordData wordData = WordDatas.defaults();
/**
* 敏感词(白名单) Data
*
* @since 0.21.0
*/
private IWordData wordDataAllow = WordDatas.defaults();
/**
* 禁止的单词
* @since 0.0.13
@@ -205,10 +213,18 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
context.sensitiveCheck(sensitiveCheck);
// 4. 初始化 word
Collection<String> denyList = wordAllowDenyCombine.getActualDenyList(wordAllow, wordDeny, context);
final List<String> wordAllowList = wordAllow.allow();
final List<String> wordDenyList = wordDeny.deny();
Collection<String> denyList = wordAllowDenyCombine.getActualDenyList(wordAllowList, wordDenyList, context);
wordData.initWordData(denyList);
//4.2 白名单,避免长白黑短
List<String> actualAllowList = InnerWordFormatUtils.formatWordList(wordAllowList, context);
wordDataAllow.initWordData(actualAllowList);
//5. 更新 context
context.wordData(wordData);
context.wordDataAllow(wordDataAllow);
this.context = context;
return this;
@@ -304,6 +320,13 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
return this;
}
public SensitiveWordBs wordDataAllow(IWordData wordDataAllow) {
ArgUtil.notNull(wordDataAllow, "wordDataAllow");
this.wordDataAllow = wordDataAllow;
return this;
}
public SensitiveWordBs sensitiveWord(ISensitiveWord sensitiveWord) {
ArgUtil.notNull(sensitiveWord, "sensitiveWord");
@@ -590,6 +613,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
@Override
public void destroy() {
this.wordData.destroy();
this.wordDataAllow.destroy();
}
/**
@@ -643,6 +667,55 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
this.addWord(wordList);
}
// 白名单---------------- START
/**
* 删除敏感词白名单
* @param word 单词
* @param others 其他
* @since 0.21.0
*/
public void removeWordAllow(String word, String ... others) {
List<String> wordList = new ArrayList<>();
wordList.add(word);
wordList.addAll(Arrays.asList(others));
removeWordAllow(wordList);
}
/**
* 删除敏感词白名单
* @param collection 集合
* @since 0.21.0
*/
public void removeWordAllow(Collection<String> collection) {
if(CollectionUtil.isEmpty(collection)) {
return;
}
for(String word : collection) {
this.wordDataAllow.removeWord(word);
}
}
/**
* 新增敏感词白名单
* @param collection 敏感词白名单集合
* @since 0.21.0
*/
public void addWordAllow(Collection<String> collection) {
this.wordDataAllow.addWord(collection);
}
/**
* 新增敏感词白名单
* @param word 敏感词白名单
* @param others 其他
* @since 0.21.0
*/
public void addWordAllow(String word, String...others) {
List<String> wordList = new ArrayList<>();
wordList.add(word);
wordList.addAll(Arrays.asList(others));
this.addWordAllow(wordList);
}
// 白名单---------------- END
//------------------------------------------------------------------------------------ 公开方法 END
}

View File

@@ -106,6 +106,13 @@ public class SensitiveWordContext implements IWordContext {
*/
private IWordData wordData;
/**
* 单词允许信息
*
* @since 0.21.0
*/
private IWordData wordDataAllow;
/**
* 单词标签
*
@@ -135,6 +142,17 @@ public class SensitiveWordContext implements IWordContext {
return this;
}
@Override
public IWordData wordDataAllow() {
return wordDataAllow;
}
@Override
public SensitiveWordContext wordDataAllow(IWordData wordDataAllow) {
this.wordDataAllow = wordDataAllow;
return this;
}
@Override
public IWordReplace wordReplace() {
return wordReplace;

View File

@@ -6,6 +6,7 @@ import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
import com.github.houbb.sensitive.word.support.check.WordCheckResult;
import com.github.houbb.sensitive.word.support.check.WordCheckWordAllow;
import com.github.houbb.sensitive.word.support.result.WordResult;
import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
@@ -59,7 +60,17 @@ public class SensitiveWord extends AbstractSensitiveWord {
.formatCharMapping(characterCharacterMap);
final IWordResultCondition wordResultCondition = context.wordResultCondition();
final IWordCheck wordCheckAllow = new WordCheckWordAllow();
for (int i = 0; i < text.length(); i++) {
// v0.21.0 白名单跳过 TODO: 感觉这种实现性能一般,考虑后续优化。
WordCheckResult wordCheckAllowResult = wordCheckAllow.sensitiveCheck(i, checkContext);
int wordLengthAllow = wordCheckAllowResult.index();
if(wordLengthAllow > 0) {
i += wordLengthAllow-1;
continue;
}
WordCheckResult checkResult = sensitiveCheck.sensitiveCheck(i, checkContext);
// 命中

View File

@@ -0,0 +1,90 @@
package com.github.houbb.sensitive.word.support.check;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
import com.github.houbb.sensitive.word.api.IWordCheck;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordData;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
import java.util.Map;
/**
* 敏感词监测实现(白名单)
* @author binbin.hou
* @since 0.21.0
*/
@ThreadSafe
public class WordCheckWordAllow extends AbstractWordCheck {
private static final IWordCheck INSTANCE = new WordCheckWordAllow();
public static IWordCheck getInstance() {
return INSTANCE;
}
@Override
protected Class<? extends IWordCheck> getSensitiveCheckClass() {
return WordCheckWordAllow.class;
}
@Override
protected int getActualLength(int beginIndex, InnerSensitiveWordContext innerContext) {
final String txt = innerContext.originalText();
final Map<Character, Character> formatCharMapping = innerContext.formatCharMapping();
final WordValidModeEnum wordValidModeEnum = innerContext.modeEnum();
final IWordContext context = innerContext.wordContext();
// 采用 ThreadLocal 应该可以提升性能,减少对象的创建。
int actualLength = 0;
final IWordData wordDataAllow = context.wordDataAllow();
// 前一个条件
StringBuilder stringBuilder = new StringBuilder();
char[] rawChars = txt.toCharArray();
final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore();
int tempLen = 0;
for(int i = beginIndex; i < rawChars.length; i++) {
// 判断是否跳过?
// 避免开始的时候命中 https://github.com/houbb/sensitive-word/issues/68
if(wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) {
tempLen++;
continue;
}
// 映射处理
final char currentChar = rawChars[i];
char mappingChar = formatCharMapping.get(currentChar);
stringBuilder.append(mappingChar);
tempLen++;
// 判断是否存在
WordContainsTypeEnum wordContainsTypeEnum = wordDataAllow.contains(stringBuilder, innerContext);
if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) {
actualLength = tempLen;
// 是否遍历全部匹配的模式
if(WordValidModeEnum.FAIL_FAST.equals(wordValidModeEnum)) {
break;
}
}
// 如果不包含,则直接返回。后续遍历无意义
if(WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnum)) {
break;
}
}
return actualLength;
}
@Override
protected String getType() {
return WordTypeEnum.WORD.getCode();
}
}

View File

@@ -22,12 +22,9 @@ public abstract class AbstractWordAllowDenyCombine implements IWordAllowDenyComb
IWordContext context);
@Override
public Collection<String> getActualDenyList(IWordAllow wordAllow,
IWordDeny wordDeny,
public Collection<String> getActualDenyList(final List<String> allowList,
final List<String> denyList,
IWordContext context) {
List<String> allowList = wordAllow.allow();
List<String> denyList = wordDeny.deny();
List<String> formatAllowList = InnerWordFormatUtils.formatWordList(allowList, context);
List<String> formatDenyList = InnerWordFormatUtils.formatWordList(denyList, context);

View File

@@ -0,0 +1,75 @@
package com.github.houbb.sensitive.word.bs;
import com.github.houbb.sensitive.word.api.IWordAllow;
import com.github.houbb.sensitive.word.api.IWordDeny;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
import com.github.houbb.sensitive.word.support.replace.WordReplaces;
import org.junit.Assert;
import org.junit.Test;
import java.util.Arrays;
import java.util.List;
/**
* <p> project: sensitive-word-SensitiveWordBsTest </p>
* <p> create on 2020/1/7 23:43 </p>
*
* @author Administrator
* @since 0.21.0
*/
public class SensitiveWordBsAllowTest {
/**
* 是否包含
*
* https://github.com/houbb/sensitive-word/issues/76
*
* @since 0.0.1
*/
@Test
public void findAllowTest() {
final String text = "三黄片黄片";
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
.wordAllow(new IWordAllow() {
@Override
public List<String> allow() {
return Arrays.asList("三黄片");
}
})
.init();
Assert.assertEquals("[黄片]", sensitiveWordBs.findAll(text).toString());
}
/**
* https://github.com/houbb/sensitive-word/issues/19
*
* @since 0.21.0
*/
@Test
public void bug19FixTest() {
final String text = "共产党是白名单不会被检测";
final String text2 = "共产党是白名单不会被检测,但是共产是黑名单";
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
.wordAllow(new IWordAllow() {
@Override
public List<String> allow() {
return Arrays.asList("共产党");
}
})
.wordDeny(new IWordDeny() {
@Override
public List<String> deny() {
return Arrays.asList("政府", "国家", "共产");
}
})
.init();
Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString());
Assert.assertEquals("[共产]", sensitiveWordBs.findAll(text2).toString());
}
}

View File

@@ -1,5 +1,6 @@
package com.github.houbb.sensitive.word.bs;
import com.github.houbb.sensitive.word.api.IWordDeny;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
import org.junit.Assert;
@@ -59,4 +60,52 @@ public class SensitiveWordBsEditWordTest {
Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString());
}
/**
* @since 0.21.0
*/
@Test
public void editWordAllowTest() {
final String text = "测试一下新增敏感词白名单,验证一下删除和新增对不对";
SensitiveWordBs sensitiveWordBs =
SensitiveWordBs.newInstance()
.wordAllow(WordAllows.empty())
.wordDeny(new IWordDeny() {
@Override
public List<String> deny() {
return Arrays.asList("测试", "新增");
}
})
.init();
// 当前
Assert.assertEquals("[测试, 新增, 新增]", sensitiveWordBs.findAll(text).toString());
// 新增单个
sensitiveWordBs.addWordAllow("测试");
sensitiveWordBs.addWordAllow("新增");
Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString());
// 删除单个
sensitiveWordBs.removeWordAllow("测试");
Assert.assertEquals("[测试]", sensitiveWordBs.findAll(text).toString());
sensitiveWordBs.removeWordAllow("新增");
Assert.assertEquals("[测试, 新增, 新增]", sensitiveWordBs.findAll(text).toString());
// 新增集合
sensitiveWordBs.addWordAllow(Arrays.asList("新增", "测试"));
Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString());
// 删除集合
sensitiveWordBs.removeWordAllow(Arrays.asList("新增", "测试"));
Assert.assertEquals("[测试, 新增, 新增]", sensitiveWordBs.findAll(text).toString());
// 新增数组
sensitiveWordBs.addWordAllow("新增", "测试");
Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString());
// 删除集合
sensitiveWordBs.removeWordAllow("新增", "测试");
Assert.assertEquals("[测试, 新增, 新增]", sensitiveWordBs.findAll(text).toString());
}
}