mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 00:17:35 +08:00
release branch 0.25.1
This commit is contained in:
@@ -414,9 +414,8 @@
|
||||
| 1 | A | wordCheck 策略支持用户自定义 | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
|
||||
| 2 | A | wordCheckUrlNoPrefix | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
|
||||
|
||||
# release_0.25.0
|
||||
# release_0.25.1
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|----------------------|:-------------------|:-----|
|
||||
| 1 | A | wordCheck 策略支持用户自定义 | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
|
||||
| 2 | A | wordCheckUrlNoPrefix | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|------------------------------|:------------------|:---------------------------------------------------|
|
||||
| 1 | A | 修正 tags 匹配问题,黑名单命中时返回对应的黑名单词 | 2025-5-2 20:25:04 | https://github.com/houbb/sensitive-word/issues/105 |
|
||||
|
||||
@@ -96,7 +96,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大
|
||||
<dependency>
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.25.0</version>
|
||||
<version>0.25.1</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.25.0</version>
|
||||
<version>0.25.1</version>
|
||||
|
||||
<properties>
|
||||
<!--============================== All Plugins START ==============================-->
|
||||
|
||||
@@ -10,7 +10,7 @@ ECHO "============================= RELEASE START..."
|
||||
|
||||
:: 版本号信息(需要手动指定)
|
||||
:::: 旧版本名称
|
||||
SET version=0.25.0
|
||||
SET version=0.25.1
|
||||
:::: 新版本名称
|
||||
SET newVersion=0.26.0
|
||||
:::: 组织名称
|
||||
|
||||
@@ -28,4 +28,11 @@ public interface IWordResult {
|
||||
*/
|
||||
String type();
|
||||
|
||||
/**
|
||||
* 实际匹配的单词,方便统一的标签等处理,实际问题排查等
|
||||
* @return 结果
|
||||
* @since 0.25.1
|
||||
*/
|
||||
String word();
|
||||
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import com.github.houbb.sensitive.word.api.*;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.check.WordCheckResult;
|
||||
import com.github.houbb.sensitive.word.support.result.WordLengthResult;
|
||||
import com.github.houbb.sensitive.word.support.result.WordResult;
|
||||
import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
|
||||
|
||||
@@ -80,13 +81,16 @@ public class SensitiveWord extends AbstractSensitiveWord {
|
||||
|
||||
|
||||
// 命中
|
||||
int wordLength = checkResult.wordLengthResult().wordDenyLen();
|
||||
final WordLengthResult wordLengthResult = checkResult.wordLengthResult();
|
||||
int wordLength = wordLengthResult.wordDenyLen();
|
||||
if (wordLength > 0) {
|
||||
// 保存敏感词
|
||||
WordResult wordResult = WordResult.newInstance()
|
||||
.startIndex(i)
|
||||
.endIndex(i+wordLength)
|
||||
.type(checkResult.type());
|
||||
.type(checkResult.type())
|
||||
.word(wordLengthResult.wordDeny());
|
||||
|
||||
//v0.13.0 添加判断
|
||||
if(wordResultCondition.match(wordResult, text, modeEnum, context)) {
|
||||
resultList.add(wordResult);
|
||||
|
||||
@@ -59,7 +59,8 @@ public abstract class AbstractWordCheck implements IWordCheck {
|
||||
return WordCheckResult.newInstance()
|
||||
.wordLengthResult(wordLengthResult)
|
||||
.type(getType())
|
||||
.checkClass(clazz);
|
||||
.checkClass(clazz)
|
||||
;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -53,6 +53,8 @@ public class WordCheckWord extends AbstractWordCheck {
|
||||
int maxWhite = 0;
|
||||
int maxBlack = 0;
|
||||
boolean firstCheck = true;
|
||||
String blackWord = null;
|
||||
String whiteWord = null;
|
||||
|
||||
WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
|
||||
WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
|
||||
@@ -72,6 +74,8 @@ public class WordCheckWord extends AbstractWordCheck {
|
||||
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) {
|
||||
maxWhite += tempLen;
|
||||
wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND;
|
||||
|
||||
whiteWord = stringBuilder.toString();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,6 +84,8 @@ public class WordCheckWord extends AbstractWordCheck {
|
||||
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) {
|
||||
maxBlack += tempLen;
|
||||
wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND;
|
||||
|
||||
blackWord = stringBuilder.toString();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,7 +99,9 @@ public class WordCheckWord extends AbstractWordCheck {
|
||||
|
||||
return WordLengthResult.newInstance()
|
||||
.wordAllowLen(maxWhite)
|
||||
.wordDenyLen(maxBlack);
|
||||
.wordDenyLen(maxBlack)
|
||||
.wordAllow(whiteWord)
|
||||
.wordDeny(blackWord);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -15,6 +15,18 @@ public class WordLengthResult {
|
||||
*/
|
||||
private int wordDenyLen;
|
||||
|
||||
/**
|
||||
* 黑名单匹配词
|
||||
* @since 0.25.1
|
||||
*/
|
||||
private String wordDeny;
|
||||
|
||||
/**
|
||||
* 白名单实际匹配值
|
||||
* @since 0.25.1
|
||||
*/
|
||||
private String wordAllow;
|
||||
|
||||
public static WordLengthResult newInstance() {
|
||||
return new WordLengthResult();
|
||||
}
|
||||
@@ -37,11 +49,31 @@ public class WordLengthResult {
|
||||
return this;
|
||||
}
|
||||
|
||||
public String wordDeny() {
|
||||
return wordDeny;
|
||||
}
|
||||
|
||||
public WordLengthResult wordDeny(String wordDeny) {
|
||||
this.wordDeny = wordDeny;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String wordAllow() {
|
||||
return wordAllow;
|
||||
}
|
||||
|
||||
public WordLengthResult wordAllow(String wordAllow) {
|
||||
this.wordAllow = wordAllow;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "WordLengthResult{" +
|
||||
"wordAllowLen=" + wordAllowLen +
|
||||
", wordDenyLen=" + wordDenyLen +
|
||||
", wordDeny='" + wordDeny + '\'' +
|
||||
", wordAllow='" + wordAllow + '\'' +
|
||||
'}';
|
||||
}
|
||||
|
||||
|
||||
@@ -18,6 +18,12 @@ public class WordResult implements IWordResult {
|
||||
*/
|
||||
private String type;
|
||||
|
||||
/**
|
||||
* 单词匹配
|
||||
* @since 0.25.0
|
||||
*/
|
||||
private String word;
|
||||
|
||||
private WordResult(){}
|
||||
|
||||
public static WordResult newInstance() {
|
||||
@@ -54,12 +60,23 @@ public class WordResult implements IWordResult {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String word() {
|
||||
return word;
|
||||
}
|
||||
|
||||
public WordResult word(String word) {
|
||||
this.word = word;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "WordResult{" +
|
||||
"startIndex=" + startIndex +
|
||||
", endIndex=" + endIndex +
|
||||
", type='" + type + '\'' +
|
||||
", word='" + word + '\'' +
|
||||
'}';
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package com.github.houbb.sensitive.word.support.result;
|
||||
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
import com.github.houbb.sensitive.word.utils.InnerWordCharUtils;
|
||||
@@ -17,25 +18,20 @@ public class WordResultHandlerWordTags extends AbstractWordResultHandler<WordTag
|
||||
|
||||
@Override
|
||||
protected WordTagsDto doHandle(IWordResult wordResult, IWordContext wordContext, String originalText) {
|
||||
WordTagsDto dto = new WordTagsDto();
|
||||
|
||||
// 截取
|
||||
String word = InnerWordCharUtils.getString(originalText.toCharArray(), wordResult);
|
||||
|
||||
// 创建 DTO 并设置原始单词
|
||||
WordTagsDto dto = new WordTagsDto();
|
||||
dto.setWord(word);
|
||||
|
||||
// 如果启用了字符忽略功能,清理单词后再查找标签
|
||||
StringBuilder wordForTagLookup = new StringBuilder();
|
||||
char[] chars = word.toCharArray();
|
||||
for (int i = 0; i < word.length(); i++) {
|
||||
// 如果字符不被忽略,则保留
|
||||
// TODO: 此处innercontext 被设为null,是否合理?
|
||||
if (!wordContext.charIgnore().ignore(i, chars, null)) {
|
||||
wordForTagLookup.append(chars[i]);
|
||||
}
|
||||
}
|
||||
// 获取 tags (使用清理后的单词查找标签)
|
||||
Set<String> wordTags = InnerWordTagUtils.tags(wordForTagLookup.toString(), wordContext);
|
||||
Set<String> wordTags = InnerWordTagUtils.tags(word, wordContext);
|
||||
|
||||
// 如果为空,则尝试使用命中的敏感词匹配 v0.25.1 bug105
|
||||
if(CollectionUtil.isEmpty(wordTags)) {
|
||||
wordTags = InnerWordTagUtils.tags(wordResult.word(), wordContext);
|
||||
}
|
||||
|
||||
dto.setWord(word);
|
||||
dto.setTags(wordTags);
|
||||
|
||||
return dto;
|
||||
|
||||
@@ -26,7 +26,7 @@ public class InnerWordTagUtils {
|
||||
public static Set<String> tags(final String word,
|
||||
final IWordContext wordContext) {
|
||||
if(StringUtil.isEmpty(word)) {
|
||||
return Collections.emptySet();
|
||||
return null;
|
||||
}
|
||||
|
||||
final IWordTag wordTag = wordContext.wordTag();
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
package com.github.houbb.sensitive.word.bugs.b105;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
@@ -17,16 +15,21 @@ import com.github.houbb.sensitive.word.support.tag.WordTags;
|
||||
/**
|
||||
* 测试Issue #105: 处理带有噪音字符时的标签查找
|
||||
* 验证在启用字符忽略功能时,敏感词标签仍能被正确查找
|
||||
*
|
||||
* src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java
|
||||
* @since 0.25.1
|
||||
*/
|
||||
public class Bug105Test {
|
||||
|
||||
@Test
|
||||
public void testNoiseCharacterInTaggedWords() {
|
||||
Map<String, Set<String>> newHashMap = new HashMap<>();
|
||||
newHashMap.put("毛主席", new HashSet<>(Arrays.asList("政治", "领导人")));
|
||||
|
||||
// 配置同时启用字符忽略和标签的实例
|
||||
SensitiveWordBs ignoreAndTagWordBs = SensitiveWordBs.newInstance()
|
||||
.charIgnore(SensitiveWordCharIgnores.specialChars()) // 启用字符忽略
|
||||
.wordTag(WordTags.map(Collections.singletonMap("毛主席",
|
||||
new HashSet<>(Arrays.asList("政治", "领导人")))))
|
||||
.wordTag(WordTags.map(newHashMap))
|
||||
.init();
|
||||
|
||||
// 包含噪音字符的敏感词文本
|
||||
@@ -39,7 +42,47 @@ public class Bug105Test {
|
||||
Assert.assertNotNull("标签不应为空", fixedWord.get(0).getTags());
|
||||
Assert.assertTrue("应包含'政治'标签", fixedWord.get(0).getTags().contains("政治"));
|
||||
Assert.assertTrue("应包含'领导人'标签", fixedWord.get(0).getTags().contains("领导人"));
|
||||
|
||||
System.out.println("Fixed result: " + fixedWord);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoiseCharacterInTaggedWords2() {
|
||||
Map<String, Set<String>> newHashMap = new HashMap<>();
|
||||
newHashMap.put("毛主席", new HashSet<>(Arrays.asList("政治", "领导人")));
|
||||
newHashMap.put("毛---主---席", new HashSet<>(Arrays.asList("政治", "领导人", "自定义的")));
|
||||
|
||||
// 配置同时启用字符忽略和标签的实例
|
||||
SensitiveWordBs ignoreAndTagWordBs = SensitiveWordBs.newInstance()
|
||||
.charIgnore(SensitiveWordCharIgnores.specialChars()) // 启用字符忽略
|
||||
.wordTag(WordTags.map(newHashMap))
|
||||
.init();
|
||||
|
||||
// 包含噪音字符的敏感词文本
|
||||
final String noisyText = "你好毛---主---席";
|
||||
|
||||
// 测试同时启用字符忽略和标签的实例(修复前会失败)
|
||||
List<WordTagsDto> fixedWord = ignoreAndTagWordBs.findAll(noisyText, WordResultHandlers.wordTags());
|
||||
Assert.assertEquals(1, fixedWord.size());
|
||||
Assert.assertEquals("[政治, 自定义的, 领导人]", fixedWord.get(0).getTags().toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoiseCharacterInTaggedWords3() {
|
||||
Map<String, Set<String>> newHashMap = new HashMap<>();
|
||||
newHashMap.put("毛xxx主xxxx席", new HashSet<>(Arrays.asList("政治", "领导人", "自定义的")));
|
||||
|
||||
// 配置同时启用字符忽略和标签的实例
|
||||
SensitiveWordBs ignoreAndTagWordBs = SensitiveWordBs.newInstance()
|
||||
.charIgnore(SensitiveWordCharIgnores.specialChars()) // 启用字符忽略
|
||||
.wordTag(WordTags.map(newHashMap))
|
||||
.init();
|
||||
|
||||
// 包含噪音字符的敏感词文本
|
||||
final String noisyText = "你好毛---主---席";
|
||||
|
||||
// 测试同时启用字符忽略和标签的实例(修复前会失败)
|
||||
List<WordTagsDto> fixedWord = ignoreAndTagWordBs.findAll(noisyText, WordResultHandlers.wordTags());
|
||||
Assert.assertEquals(1, fixedWord.size());
|
||||
Assert.assertTrue(CollectionUtil.isEmpty(fixedWord.get(0).getTags()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -62,7 +62,7 @@ public class SensitiveWordHelperTest {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
List<IWordResult> wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
|
||||
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD'}, WordResult{startIndex=9, endIndex=12, type='WORD'}, WordResult{startIndex=18, endIndex=21, type='WORD'}]", wordList.toString());
|
||||
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD', word='5星红旗'}, WordResult{startIndex=9, endIndex=12, type='WORD', word='毛主席'}, WordResult{startIndex=18, endIndex=21, type='WORD', word='天安门'}]", wordList.toString());
|
||||
}
|
||||
|
||||
|
||||
@@ -99,7 +99,7 @@ public class SensitiveWordHelperTest {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw());
|
||||
Assert.assertEquals("WordResult{startIndex=0, endIndex=4, type='WORD'}", word.toString());
|
||||
Assert.assertEquals("WordResult{startIndex=0, endIndex=4, type='WORD', word='5星红旗'}", word.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -10,7 +10,7 @@ import org.junit.Assert;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @since 0.12.0
|
||||
@@ -27,7 +27,7 @@ public class WordResultHandlerTest {
|
||||
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList2.toString());
|
||||
|
||||
List<IWordResult> wordList3 = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
|
||||
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD'}, WordResult{startIndex=9, endIndex=12, type='WORD'}, WordResult{startIndex=18, endIndex=21, type='WORD'}]", wordList3.toString());
|
||||
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD', word='5星红旗'}, WordResult{startIndex=9, endIndex=12, type='WORD', word='毛主席'}, WordResult{startIndex=18, endIndex=21, type='WORD', word='天安门'}]", wordList3.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -35,20 +35,24 @@ public class WordResultHandlerTest {
|
||||
final String text = "骂人:你他妈; 邮箱:123@qq.com; mobile: 13088889999; 网址:https://www.baidu.com";
|
||||
List<IWordResult> wordList3 = SensitiveWordHelper
|
||||
.findAll(text, WordResultHandlers.raw());
|
||||
Assert.assertEquals("[WordResult{startIndex=3, endIndex=6, type='WORD'}]", wordList3.toString());
|
||||
Assert.assertEquals("[WordResult{startIndex=3, endIndex=6, type='WORD', word='你他妈'}]", wordList3.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
public void wordTagsTest() {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
// 默认敏感词标签为空
|
||||
List<WordTagsDto> wordList1 = SensitiveWordHelper.findAll(text, WordResultHandlers.wordTags());
|
||||
Assert.assertEquals("[WordTagsDto{word='五星红旗', tags=[]}, WordTagsDto{word='毛主席', tags=[]}, WordTagsDto{word='天安门', tags=[]}]", wordList1.toString());
|
||||
Assert.assertEquals("[WordTagsDto{word='五星红旗', tags=null}, WordTagsDto{word='毛主席', tags=[0]}, WordTagsDto{word='天安门', tags=null}]", wordList1.toString());
|
||||
|
||||
Map<String, Set<String>> wordMap = new HashMap<>();
|
||||
wordMap.put("五星红旗", new HashSet<>(Arrays.asList("政治", "国家")));
|
||||
wordMap.put("毛主席", new HashSet<>(Arrays.asList("政治", "伟人", "国家")));
|
||||
wordMap.put("天安门", new HashSet<>(Arrays.asList("政治", "国家", "地址")));
|
||||
|
||||
List<WordTagsDto> wordList2 = SensitiveWordBs.newInstance()
|
||||
.wordTag(WordTags.file("D:\\github\\sensitive-word\\src\\test\\resources\\dict_tag_test.txt"))
|
||||
.wordTag(WordTags.map(wordMap))
|
||||
.init()
|
||||
.findAll(text, WordResultHandlers.wordTags());
|
||||
Assert.assertEquals("[WordTagsDto{word='五星红旗', tags=[政治, 国家]}, WordTagsDto{word='毛主席', tags=[政治, 伟人, 国家]}, WordTagsDto{word='天安门', tags=[政治, 国家, 地址]}]", wordList2.toString());
|
||||
|
||||
Reference in New Issue
Block a user