mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
Merge pull request #111 from similar2/master
[fix] issue #105 tagging not works when enable ignore
This commit is contained in:
@@ -19,13 +19,25 @@ public class WordResultHandlerWordTags extends AbstractWordResultHandler<WordTag
|
||||
protected WordTagsDto doHandle(IWordResult wordResult, IWordContext wordContext, String originalText) {
|
||||
// 截取
|
||||
String word = InnerWordCharUtils.getString(originalText.toCharArray(), wordResult);
|
||||
// 标签
|
||||
|
||||
// 创建 DTO 并设置原始单词
|
||||
WordTagsDto dto = new WordTagsDto();
|
||||
dto.setWord(word);
|
||||
// 获取 tags
|
||||
Set<String> wordTags = InnerWordTagUtils.tags(word, wordContext);
|
||||
|
||||
// 如果启用了字符忽略功能,清理单词后再查找标签
|
||||
StringBuilder wordForTagLookup = new StringBuilder();
|
||||
char[] chars = word.toCharArray();
|
||||
for (int i = 0; i < word.length(); i++) {
|
||||
// 如果字符不被忽略,则保留
|
||||
// TODO: 此处innercontext 被设为null,是否合理?
|
||||
if (!wordContext.charIgnore().ignore(i, chars, null)) {
|
||||
wordForTagLookup.append(chars[i]);
|
||||
}
|
||||
}
|
||||
// 获取 tags (使用清理后的单词查找标签)
|
||||
Set<String> wordTags = InnerWordTagUtils.tags(wordForTagLookup.toString(), wordContext);
|
||||
dto.setTags(wordTags);
|
||||
|
||||
return dto;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
package com.github.houbb.sensitive.word.bugs.b105;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
|
||||
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
|
||||
import com.github.houbb.sensitive.word.support.result.WordTagsDto;
|
||||
import com.github.houbb.sensitive.word.support.tag.WordTags;
|
||||
|
||||
/**
|
||||
* 测试Issue #105: 处理带有噪音字符时的标签查找
|
||||
* 验证在启用字符忽略功能时,敏感词标签仍能被正确查找
|
||||
*/
|
||||
public class Bug105Test {
|
||||
|
||||
@Test
|
||||
public void testNoiseCharacterInTaggedWords() {
|
||||
// 配置同时启用字符忽略和标签的实例
|
||||
SensitiveWordBs ignoreAndTagWordBs = SensitiveWordBs.newInstance()
|
||||
.charIgnore(SensitiveWordCharIgnores.specialChars()) // 启用字符忽略
|
||||
.wordTag(WordTags.map(Collections.singletonMap("毛主席",
|
||||
new HashSet<>(Arrays.asList("政治", "领导人")))))
|
||||
.init();
|
||||
|
||||
// 包含噪音字符的敏感词文本
|
||||
final String noisyText = "你好毛---主---席";
|
||||
|
||||
// 测试同时启用字符忽略和标签的实例(修复前会失败)
|
||||
List<WordTagsDto> fixedWord = ignoreAndTagWordBs.findAll(noisyText, WordResultHandlers.wordTags());
|
||||
Assert.assertEquals(1, fixedWord.size());
|
||||
Assert.assertEquals("毛---主---席", fixedWord.get(0).getWord());
|
||||
Assert.assertNotNull("标签不应为空", fixedWord.get(0).getTags());
|
||||
Assert.assertTrue("应包含'政治'标签", fixedWord.get(0).getTags().contains("政治"));
|
||||
Assert.assertTrue("应包含'领导人'标签", fixedWord.get(0).getTags().contains("领导人"));
|
||||
|
||||
System.out.println("Fixed result: " + fixedWord);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user