mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
Merge remote-tracking branch 'origin/master'
# Conflicts: # src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
This commit is contained in:
@@ -414,9 +414,8 @@
|
|||||||
| 1 | A | wordCheck 策略支持用户自定义 | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
|
| 1 | A | wordCheck 策略支持用户自定义 | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
|
||||||
| 2 | A | wordCheckUrlNoPrefix | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
|
| 2 | A | wordCheckUrlNoPrefix | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
|
||||||
|
|
||||||
# release_0.25.0
|
# release_0.25.1
|
||||||
|
|
||||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||||
|:---|:-----|----------------------|:-------------------|:-----|
|
|:---|:-----|------------------------------|:------------------|:---------------------------------------------------|
|
||||||
| 1 | A | wordCheck 策略支持用户自定义 | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
|
| 1 | A | 修正 tags 匹配问题,黑名单命中时返回对应的黑名单词 | 2025-5-2 20:25:04 | https://github.com/houbb/sensitive-word/issues/105 |
|
||||||
| 2 | A | wordCheckUrlNoPrefix | 2025-2-17 12:06:45 | https://github.com/houbb/sensitive-word/issues/101 |
|
|
||||||
|
|||||||
10
README.md
10
README.md
@@ -96,7 +96,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.github.houbb</groupId>
|
<groupId>com.github.houbb</groupId>
|
||||||
<artifactId>sensitive-word</artifactId>
|
<artifactId>sensitive-word</artifactId>
|
||||||
<version>0.25.0</version>
|
<version>0.25.1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -1289,6 +1289,14 @@ FormatCombine/CheckCombine/AllowDenyCombine 组合策略,允许用户自定义
|
|||||||
|
|
||||||
[v0.21.0 敏感词新特性之白名单支持单个编辑,修正白名单包含黑名单时的问题](https://houbb.github.io/2020/01/07/sensitive-word-12-v0.21.0-allow-word-edit)
|
[v0.21.0 敏感词新特性之白名单支持单个编辑,修正白名单包含黑名单时的问题](https://houbb.github.io/2020/01/07/sensitive-word-12-v0.21.0-allow-word-edit)
|
||||||
|
|
||||||
|
[v0.23.0 敏感词结果条件拓展,内置支持链式+单词标签](https://houbb.github.io/2020/01/07/sensitive-word-13-v0.23.0-result-condition-enhance)
|
||||||
|
|
||||||
|
[v0.24.0 新特性支持标签分类,内置实现多种策略](https://houbb.github.io/2020/01/07/sensitive-word-13-v0.24.0-word-tag-impl)
|
||||||
|
|
||||||
|
[v0.25.0 新特性之 wordCheck 策略支持用户自定义](https://houbb.github.io/2020/01/07/sensitive-word-14-v0.25.0-url-define)
|
||||||
|
|
||||||
|
[v0.25.1 新特性之返回匹配词,修正 tags 标签](https://houbb.github.io/2020/01/07/sensitive-word-14-v0.25.1-tags-match)
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
# NLP 开源矩阵
|
# NLP 开源矩阵
|
||||||
|
|||||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.github.houbb</groupId>
|
<groupId>com.github.houbb</groupId>
|
||||||
<artifactId>sensitive-word</artifactId>
|
<artifactId>sensitive-word</artifactId>
|
||||||
<version>0.25.0</version>
|
<version>0.25.1</version>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<!--============================== All Plugins START ==============================-->
|
<!--============================== All Plugins START ==============================-->
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ ECHO "============================= RELEASE START..."
|
|||||||
|
|
||||||
:: 版本号信息(需要手动指定)
|
:: 版本号信息(需要手动指定)
|
||||||
:::: 旧版本名称
|
:::: 旧版本名称
|
||||||
SET version=0.25.0
|
SET version=0.25.1
|
||||||
:::: 新版本名称
|
:::: 新版本名称
|
||||||
SET newVersion=0.26.0
|
SET newVersion=0.26.0
|
||||||
:::: 组织名称
|
:::: 组织名称
|
||||||
|
|||||||
@@ -28,4 +28,11 @@ public interface IWordResult {
|
|||||||
*/
|
*/
|
||||||
String type();
|
String type();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 实际匹配的单词,方便统一的标签等处理,实际问题排查等
|
||||||
|
* @return 结果
|
||||||
|
* @since 0.25.1
|
||||||
|
*/
|
||||||
|
String word();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import com.github.houbb.sensitive.word.api.*;
|
|||||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||||
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
|
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
|
||||||
import com.github.houbb.sensitive.word.support.check.WordCheckResult;
|
import com.github.houbb.sensitive.word.support.check.WordCheckResult;
|
||||||
|
import com.github.houbb.sensitive.word.support.result.WordLengthResult;
|
||||||
import com.github.houbb.sensitive.word.support.result.WordResult;
|
import com.github.houbb.sensitive.word.support.result.WordResult;
|
||||||
import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
|
import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
|
||||||
|
|
||||||
@@ -80,13 +81,16 @@ public class SensitiveWord extends AbstractSensitiveWord {
|
|||||||
|
|
||||||
|
|
||||||
// 命中
|
// 命中
|
||||||
int wordLength = checkResult.wordLengthResult().wordDenyLen();
|
final WordLengthResult wordLengthResult = checkResult.wordLengthResult();
|
||||||
|
int wordLength = wordLengthResult.wordDenyLen();
|
||||||
if (wordLength > 0) {
|
if (wordLength > 0) {
|
||||||
// 保存敏感词
|
// 保存敏感词
|
||||||
WordResult wordResult = WordResult.newInstance()
|
WordResult wordResult = WordResult.newInstance()
|
||||||
.startIndex(i)
|
.startIndex(i)
|
||||||
.endIndex(i+wordLength)
|
.endIndex(i+wordLength)
|
||||||
.type(checkResult.type());
|
.type(checkResult.type())
|
||||||
|
.word(wordLengthResult.wordDeny());
|
||||||
|
|
||||||
//v0.13.0 添加判断
|
//v0.13.0 添加判断
|
||||||
if(wordResultCondition.match(wordResult, text, modeEnum, context)) {
|
if(wordResultCondition.match(wordResult, text, modeEnum, context)) {
|
||||||
resultList.add(wordResult);
|
resultList.add(wordResult);
|
||||||
|
|||||||
@@ -59,7 +59,8 @@ public abstract class AbstractWordCheck implements IWordCheck {
|
|||||||
return WordCheckResult.newInstance()
|
return WordCheckResult.newInstance()
|
||||||
.wordLengthResult(wordLengthResult)
|
.wordLengthResult(wordLengthResult)
|
||||||
.type(getType())
|
.type(getType())
|
||||||
.checkClass(clazz);
|
.checkClass(clazz)
|
||||||
|
;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,18 @@ public class WordLengthResult {
|
|||||||
*/
|
*/
|
||||||
private int wordDenyLen;
|
private int wordDenyLen;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 黑名单匹配词
|
||||||
|
* @since 0.25.1
|
||||||
|
*/
|
||||||
|
private String wordDeny;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 白名单实际匹配值
|
||||||
|
* @since 0.25.1
|
||||||
|
*/
|
||||||
|
private String wordAllow;
|
||||||
|
|
||||||
public static WordLengthResult newInstance() {
|
public static WordLengthResult newInstance() {
|
||||||
return new WordLengthResult();
|
return new WordLengthResult();
|
||||||
}
|
}
|
||||||
@@ -37,11 +49,31 @@ public class WordLengthResult {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String wordDeny() {
|
||||||
|
return wordDeny;
|
||||||
|
}
|
||||||
|
|
||||||
|
public WordLengthResult wordDeny(String wordDeny) {
|
||||||
|
this.wordDeny = wordDeny;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String wordAllow() {
|
||||||
|
return wordAllow;
|
||||||
|
}
|
||||||
|
|
||||||
|
public WordLengthResult wordAllow(String wordAllow) {
|
||||||
|
this.wordAllow = wordAllow;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "WordLengthResult{" +
|
return "WordLengthResult{" +
|
||||||
"wordAllowLen=" + wordAllowLen +
|
"wordAllowLen=" + wordAllowLen +
|
||||||
", wordDenyLen=" + wordDenyLen +
|
", wordDenyLen=" + wordDenyLen +
|
||||||
|
", wordDeny='" + wordDeny + '\'' +
|
||||||
|
", wordAllow='" + wordAllow + '\'' +
|
||||||
'}';
|
'}';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -18,6 +18,12 @@ public class WordResult implements IWordResult {
|
|||||||
*/
|
*/
|
||||||
private String type;
|
private String type;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 单词匹配
|
||||||
|
* @since 0.25.0
|
||||||
|
*/
|
||||||
|
private String word;
|
||||||
|
|
||||||
private WordResult(){}
|
private WordResult(){}
|
||||||
|
|
||||||
public static WordResult newInstance() {
|
public static WordResult newInstance() {
|
||||||
@@ -54,12 +60,23 @@ public class WordResult implements IWordResult {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String word() {
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
|
||||||
|
public WordResult word(String word) {
|
||||||
|
this.word = word;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "WordResult{" +
|
return "WordResult{" +
|
||||||
"startIndex=" + startIndex +
|
"startIndex=" + startIndex +
|
||||||
", endIndex=" + endIndex +
|
", endIndex=" + endIndex +
|
||||||
", type='" + type + '\'' +
|
", type='" + type + '\'' +
|
||||||
|
", word='" + word + '\'' +
|
||||||
'}';
|
'}';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
package com.github.houbb.sensitive.word.support.result;
|
package com.github.houbb.sensitive.word.support.result;
|
||||||
|
|
||||||
|
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||||
import com.github.houbb.sensitive.word.utils.InnerWordCharUtils;
|
import com.github.houbb.sensitive.word.utils.InnerWordCharUtils;
|
||||||
@@ -17,15 +18,22 @@ public class WordResultHandlerWordTags extends AbstractWordResultHandler<WordTag
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected WordTagsDto doHandle(IWordResult wordResult, IWordContext wordContext, String originalText) {
|
protected WordTagsDto doHandle(IWordResult wordResult, IWordContext wordContext, String originalText) {
|
||||||
|
WordTagsDto dto = new WordTagsDto();
|
||||||
|
|
||||||
// 截取
|
// 截取
|
||||||
String word = InnerWordCharUtils.getString(originalText.toCharArray(), wordResult);
|
String word = InnerWordCharUtils.getString(originalText.toCharArray(), wordResult);
|
||||||
// 标签
|
|
||||||
|
|
||||||
WordTagsDto dto = new WordTagsDto();
|
// 获取 tags (使用清理后的单词查找标签)
|
||||||
dto.setWord(word);
|
|
||||||
// 获取 tags
|
|
||||||
Set<String> wordTags = InnerWordTagUtils.tags(word, wordContext);
|
Set<String> wordTags = InnerWordTagUtils.tags(word, wordContext);
|
||||||
|
|
||||||
|
// 如果为空,则尝试使用命中的敏感词匹配 v0.25.1 bug105
|
||||||
|
if(CollectionUtil.isEmpty(wordTags)) {
|
||||||
|
wordTags = InnerWordTagUtils.tags(wordResult.word(), wordContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
dto.setWord(word);
|
||||||
dto.setTags(wordTags);
|
dto.setTags(wordTags);
|
||||||
|
|
||||||
return dto;
|
return dto;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ public class InnerWordTagUtils {
|
|||||||
public static Set<String> tags(final String word,
|
public static Set<String> tags(final String word,
|
||||||
final IWordContext wordContext) {
|
final IWordContext wordContext) {
|
||||||
if(StringUtil.isEmpty(word)) {
|
if(StringUtil.isEmpty(word)) {
|
||||||
return Collections.emptySet();
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
final IWordTag wordTag = wordContext.wordTag();
|
final IWordTag wordTag = wordContext.wordTag();
|
||||||
|
|||||||
@@ -0,0 +1,88 @@
|
|||||||
|
package com.github.houbb.sensitive.word.bugs.b105;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||||
|
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
|
||||||
|
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
|
||||||
|
import com.github.houbb.sensitive.word.support.result.WordTagsDto;
|
||||||
|
import com.github.houbb.sensitive.word.support.tag.WordTags;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 测试Issue #105: 处理带有噪音字符时的标签查找
|
||||||
|
* 验证在启用字符忽略功能时,敏感词标签仍能被正确查找
|
||||||
|
*
|
||||||
|
* src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java
|
||||||
|
* @since 0.25.1
|
||||||
|
*/
|
||||||
|
public class Bug105Test {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoiseCharacterInTaggedWords() {
|
||||||
|
Map<String, Set<String>> newHashMap = new HashMap<>();
|
||||||
|
newHashMap.put("毛主席", new HashSet<>(Arrays.asList("政治", "领导人")));
|
||||||
|
|
||||||
|
// 配置同时启用字符忽略和标签的实例
|
||||||
|
SensitiveWordBs ignoreAndTagWordBs = SensitiveWordBs.newInstance()
|
||||||
|
.charIgnore(SensitiveWordCharIgnores.specialChars()) // 启用字符忽略
|
||||||
|
.wordTag(WordTags.map(newHashMap))
|
||||||
|
.init();
|
||||||
|
|
||||||
|
// 包含噪音字符的敏感词文本
|
||||||
|
final String noisyText = "你好毛---主---席";
|
||||||
|
|
||||||
|
// 测试同时启用字符忽略和标签的实例(修复前会失败)
|
||||||
|
List<WordTagsDto> fixedWord = ignoreAndTagWordBs.findAll(noisyText, WordResultHandlers.wordTags());
|
||||||
|
Assert.assertEquals(1, fixedWord.size());
|
||||||
|
Assert.assertEquals("毛---主---席", fixedWord.get(0).getWord());
|
||||||
|
Assert.assertNotNull("标签不应为空", fixedWord.get(0).getTags());
|
||||||
|
Assert.assertTrue("应包含'政治'标签", fixedWord.get(0).getTags().contains("政治"));
|
||||||
|
Assert.assertTrue("应包含'领导人'标签", fixedWord.get(0).getTags().contains("领导人"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoiseCharacterInTaggedWords2() {
|
||||||
|
Map<String, Set<String>> newHashMap = new HashMap<>();
|
||||||
|
newHashMap.put("毛主席", new HashSet<>(Arrays.asList("政治", "领导人")));
|
||||||
|
newHashMap.put("毛---主---席", new HashSet<>(Arrays.asList("政治", "领导人", "自定义的")));
|
||||||
|
|
||||||
|
// 配置同时启用字符忽略和标签的实例
|
||||||
|
SensitiveWordBs ignoreAndTagWordBs = SensitiveWordBs.newInstance()
|
||||||
|
.charIgnore(SensitiveWordCharIgnores.specialChars()) // 启用字符忽略
|
||||||
|
.wordTag(WordTags.map(newHashMap))
|
||||||
|
.init();
|
||||||
|
|
||||||
|
// 包含噪音字符的敏感词文本
|
||||||
|
final String noisyText = "你好毛---主---席";
|
||||||
|
|
||||||
|
// 测试同时启用字符忽略和标签的实例(修复前会失败)
|
||||||
|
List<WordTagsDto> fixedWord = ignoreAndTagWordBs.findAll(noisyText, WordResultHandlers.wordTags());
|
||||||
|
Assert.assertEquals(1, fixedWord.size());
|
||||||
|
Assert.assertEquals("[政治, 自定义的, 领导人]", fixedWord.get(0).getTags().toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoiseCharacterInTaggedWords3() {
|
||||||
|
Map<String, Set<String>> newHashMap = new HashMap<>();
|
||||||
|
newHashMap.put("毛xxx主xxxx席", new HashSet<>(Arrays.asList("政治", "领导人", "自定义的")));
|
||||||
|
|
||||||
|
// 配置同时启用字符忽略和标签的实例
|
||||||
|
SensitiveWordBs ignoreAndTagWordBs = SensitiveWordBs.newInstance()
|
||||||
|
.charIgnore(SensitiveWordCharIgnores.specialChars()) // 启用字符忽略
|
||||||
|
.wordTag(WordTags.map(newHashMap))
|
||||||
|
.init();
|
||||||
|
|
||||||
|
// 包含噪音字符的敏感词文本
|
||||||
|
final String noisyText = "你好毛---主---席";
|
||||||
|
|
||||||
|
// 测试同时启用字符忽略和标签的实例(修复前会失败)
|
||||||
|
List<WordTagsDto> fixedWord = ignoreAndTagWordBs.findAll(noisyText, WordResultHandlers.wordTags());
|
||||||
|
Assert.assertEquals(1, fixedWord.size());
|
||||||
|
Assert.assertTrue(CollectionUtil.isEmpty(fixedWord.get(0).getTags()));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -62,7 +62,7 @@ public class SensitiveWordHelperTest {
|
|||||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||||
|
|
||||||
List<IWordResult> wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
|
List<IWordResult> wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
|
||||||
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD'}, WordResult{startIndex=9, endIndex=12, type='WORD'}, WordResult{startIndex=18, endIndex=21, type='WORD'}]", wordList.toString());
|
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD', word='5星红旗'}, WordResult{startIndex=9, endIndex=12, type='WORD', word='毛主席'}, WordResult{startIndex=18, endIndex=21, type='WORD', word='天安门'}]", wordList.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -99,7 +99,7 @@ public class SensitiveWordHelperTest {
|
|||||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||||
|
|
||||||
IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw());
|
IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw());
|
||||||
Assert.assertEquals("WordResult{startIndex=0, endIndex=4, type='WORD'}", word.toString());
|
Assert.assertEquals("WordResult{startIndex=0, endIndex=4, type='WORD', word='5星红旗'}", word.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import org.junit.Assert;
|
|||||||
import org.junit.Ignore;
|
import org.junit.Ignore;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @since 0.12.0
|
* @since 0.12.0
|
||||||
@@ -27,7 +27,7 @@ public class WordResultHandlerTest {
|
|||||||
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList2.toString());
|
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList2.toString());
|
||||||
|
|
||||||
List<IWordResult> wordList3 = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
|
List<IWordResult> wordList3 = SensitiveWordHelper.findAll(text, WordResultHandlers.raw());
|
||||||
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD'}, WordResult{startIndex=9, endIndex=12, type='WORD'}, WordResult{startIndex=18, endIndex=21, type='WORD'}]", wordList3.toString());
|
Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD', word='5星红旗'}, WordResult{startIndex=9, endIndex=12, type='WORD', word='毛主席'}, WordResult{startIndex=18, endIndex=21, type='WORD', word='天安门'}]", wordList3.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@@ -35,20 +35,24 @@ public class WordResultHandlerTest {
|
|||||||
final String text = "骂人:你他妈; 邮箱:123@qq.com; mobile: 13088889999; 网址:https://www.baidu.com";
|
final String text = "骂人:你他妈; 邮箱:123@qq.com; mobile: 13088889999; 网址:https://www.baidu.com";
|
||||||
List<IWordResult> wordList3 = SensitiveWordHelper
|
List<IWordResult> wordList3 = SensitiveWordHelper
|
||||||
.findAll(text, WordResultHandlers.raw());
|
.findAll(text, WordResultHandlers.raw());
|
||||||
Assert.assertEquals("[WordResult{startIndex=3, endIndex=6, type='WORD'}]", wordList3.toString());
|
Assert.assertEquals("[WordResult{startIndex=3, endIndex=6, type='WORD', word='你他妈'}]", wordList3.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@Ignore
|
|
||||||
public void wordTagsTest() {
|
public void wordTagsTest() {
|
||||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||||
|
|
||||||
// 默认敏感词标签为空
|
// 默认敏感词标签为空
|
||||||
List<WordTagsDto> wordList1 = SensitiveWordHelper.findAll(text, WordResultHandlers.wordTags());
|
List<WordTagsDto> wordList1 = SensitiveWordHelper.findAll(text, WordResultHandlers.wordTags());
|
||||||
Assert.assertEquals("[WordTagsDto{word='五星红旗', tags=[]}, WordTagsDto{word='毛主席', tags=[]}, WordTagsDto{word='天安门', tags=[]}]", wordList1.toString());
|
Assert.assertEquals("[WordTagsDto{word='五星红旗', tags=null}, WordTagsDto{word='毛主席', tags=[0]}, WordTagsDto{word='天安门', tags=null}]", wordList1.toString());
|
||||||
|
|
||||||
|
Map<String, Set<String>> wordMap = new HashMap<>();
|
||||||
|
wordMap.put("五星红旗", new HashSet<>(Arrays.asList("政治", "国家")));
|
||||||
|
wordMap.put("毛主席", new HashSet<>(Arrays.asList("政治", "伟人", "国家")));
|
||||||
|
wordMap.put("天安门", new HashSet<>(Arrays.asList("政治", "国家", "地址")));
|
||||||
|
|
||||||
List<WordTagsDto> wordList2 = SensitiveWordBs.newInstance()
|
List<WordTagsDto> wordList2 = SensitiveWordBs.newInstance()
|
||||||
.wordTag(WordTags.file("D:\\github\\sensitive-word\\src\\test\\resources\\dict_tag_test.txt"))
|
.wordTag(WordTags.map(wordMap))
|
||||||
.init()
|
.init()
|
||||||
.findAll(text, WordResultHandlers.wordTags());
|
.findAll(text, WordResultHandlers.wordTags());
|
||||||
Assert.assertEquals("[WordTagsDto{word='五星红旗', tags=[政治, 国家]}, WordTagsDto{word='毛主席', tags=[政治, 伟人, 国家]}, WordTagsDto{word='天安门', tags=[政治, 国家, 地址]}]", wordList2.toString());
|
Assert.assertEquals("[WordTagsDto{word='五星红旗', tags=[政治, 国家]}, WordTagsDto{word='毛主席', tags=[政治, 伟人, 国家]}, WordTagsDto{word='天安门', tags=[政治, 国家, 地址]}]", wordList2.toString());
|
||||||
|
|||||||
Reference in New Issue
Block a user