mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
release branch 0.23.1
This commit is contained in:
@@ -379,4 +379,10 @@
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|------------|:-------------------|:-----------|
|
||||
| 1 | A | 进一步拓展结果条件类 | 2024-12-8 21:13:44 | 支持同时指定多个条件 |
|
||||
| 1 | A | 进一步拓展结果条件类 | 2024-12-8 21:13:44 | 支持同时指定多个条件 |
|
||||
|
||||
# release_0.23.1
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|---------------------------------------------|:--------------------|:------------|
|
||||
| 1 | O | WordResultHandlerWordTags+获取tags时,统一格式化处理优化 | 2024-12-22 12:45:53 | 保持统一,简化词库信息 |
|
||||
12
README.md
12
README.md
@@ -54,12 +54,6 @@
|
||||
|
||||
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md)
|
||||
|
||||
### V0.21.0
|
||||
|
||||
- 修正白名单较长,包含了黑名单,导致白名单不符合预期的场景。
|
||||
|
||||
- 新增了白名单单个的编辑操作
|
||||
|
||||
### V0.22.0
|
||||
|
||||
- 修正单个敏感词修改时,对应的格式处理问题
|
||||
@@ -68,6 +62,10 @@
|
||||
|
||||
- 结果条件拓展支持 wordTags 和 chains
|
||||
|
||||
### V0.23.1
|
||||
|
||||
- 敏感词标签统一格式化处理优化
|
||||
|
||||
## 更多资料
|
||||
|
||||
### 敏感词控台
|
||||
@@ -98,7 +96,7 @@
|
||||
<dependency>
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.23.0</version>
|
||||
<version>0.23.1</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.23.0</version>
|
||||
<version>0.23.1</version>
|
||||
|
||||
<properties>
|
||||
<!--============================== All Plugins START ==============================-->
|
||||
|
||||
@@ -10,7 +10,7 @@ ECHO "============================= RELEASE START..."
|
||||
|
||||
:: 版本号信息(需要手动指定)
|
||||
:::: 旧版本名称
|
||||
SET version=0.23.0
|
||||
SET version=0.23.1
|
||||
:::: 新版本名称
|
||||
SET newVersion=0.24.0
|
||||
:::: 组织名称
|
||||
|
||||
@@ -21,6 +21,7 @@ import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
|
||||
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
|
||||
import com.github.houbb.sensitive.word.support.tag.WordTags;
|
||||
import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
|
||||
import com.github.houbb.sensitive.word.utils.InnerWordTagUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
@@ -602,12 +603,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
||||
* @since 0.10.0
|
||||
*/
|
||||
public Set<String> tags(final String word) {
|
||||
if(StringUtil.isEmpty(word)) {
|
||||
return Collections.emptySet();
|
||||
}
|
||||
|
||||
// 是否需要格式化?
|
||||
return wordTag.getTag(word);
|
||||
return InnerWordTagUtils.tags(word, context);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.support.result;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordResult;
|
||||
import com.github.houbb.sensitive.word.utils.InnerWordCharUtils;
|
||||
import com.github.houbb.sensitive.word.utils.InnerWordTagUtils;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
@@ -23,7 +24,7 @@ public class WordResultHandlerWordTags extends AbstractWordResultHandler<WordTag
|
||||
WordTagsDto dto = new WordTagsDto();
|
||||
dto.setWord(word);
|
||||
// 获取 tags
|
||||
Set<String> wordTags = wordContext.wordTag().getTag(word);
|
||||
Set<String> wordTags = InnerWordTagUtils.tags(word, wordContext);
|
||||
dto.setTags(wordTags);
|
||||
return dto;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
package com.github.houbb.sensitive.word.utils;
|
||||
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* 内部的单词标签工具类
|
||||
*
|
||||
* @since 0.24.0
|
||||
*/
|
||||
public class InnerWordTagUtils {
|
||||
|
||||
/**
|
||||
* 获取敏感词的标签
|
||||
*
|
||||
* @param word 敏感词
|
||||
* @return 结果
|
||||
* @since 0.24.0
|
||||
*/
|
||||
public static Set<String> tags(final String word,
|
||||
final IWordContext wordContext) {
|
||||
if(StringUtil.isEmpty(word)) {
|
||||
return Collections.emptySet();
|
||||
}
|
||||
|
||||
// 是否需要格式化? v0.24.0
|
||||
String formatWord = InnerWordFormatUtils.format(word, wordContext);
|
||||
return wordContext.wordTag().getTag(formatWord);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,8 +1,16 @@
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||
import com.github.houbb.sensitive.word.api.IWordTag;
|
||||
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
|
||||
import com.github.houbb.sensitive.word.support.result.WordTagsDto;
|
||||
import com.github.houbb.sensitive.word.support.tag.AbstractWordTag;
|
||||
import com.github.houbb.sensitive.word.support.tag.WordTags;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* <p> project: sensitive-word-SensitiveWordBsTest </p>
|
||||
@@ -13,17 +21,66 @@ import org.junit.Assert;
|
||||
*/
|
||||
public class SensitiveWordBsTagTest {
|
||||
|
||||
public static void main(String[] args) {
|
||||
String filePath = "D:\\code\\github\\sensitive-word\\src\\test\\resources\\dict_tag_test.txt";
|
||||
private void addLine(String line,
|
||||
Map<String, Set<String>> wordTagMap) {
|
||||
String[] strings = line.split(" ");
|
||||
String key = strings[0];
|
||||
Set<String> tags = new HashSet<>(StringUtil.splitToList(strings[1]));
|
||||
wordTagMap.put(key, tags);
|
||||
}
|
||||
|
||||
IWordTag wordTag = WordTags.file(filePath);
|
||||
@Test
|
||||
public void wordResultHandlerWordTagsTest() {
|
||||
// 自定义测试标签类
|
||||
final Map<String, Set<String>> wordTagMap = new HashMap<>();
|
||||
addLine("0售 广告", wordTagMap);
|
||||
IWordTag wordTag = new AbstractWordTag() {
|
||||
@Override
|
||||
protected Set<String> doGetTag(String word) {
|
||||
return wordTagMap.get(word);
|
||||
}
|
||||
};
|
||||
|
||||
// 指定初始化
|
||||
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Arrays.asList("0售");
|
||||
}
|
||||
})
|
||||
.wordTag(wordTag)
|
||||
.init()
|
||||
;
|
||||
List<WordTagsDto> wordTagsDtoList1 = sensitiveWordBs.findAll("零售", WordResultHandlers.wordTags());
|
||||
Assert.assertEquals("[WordTagsDto{word='零售', tags=[广告]}]", wordTagsDtoList1.toString());
|
||||
|
||||
List<WordTagsDto> wordTagsDtoList2 = sensitiveWordBs.findAll("0售", WordResultHandlers.wordTags());
|
||||
Assert.assertEquals("[WordTagsDto{word='0售', tags=[广告]}]", wordTagsDtoList2.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void wordTagsTest() {
|
||||
// 自定义测试标签类
|
||||
final Map<String, Set<String>> wordTagMap = new HashMap<>();
|
||||
addLine("0售 广告", wordTagMap);
|
||||
addLine("天安门 政治,国家,地址", wordTagMap);
|
||||
IWordTag wordTag = new AbstractWordTag() {
|
||||
@Override
|
||||
protected Set<String> doGetTag(String word) {
|
||||
return wordTagMap.get(word);
|
||||
}
|
||||
};
|
||||
|
||||
// 指定初始化
|
||||
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
|
||||
.wordTag(wordTag)
|
||||
.init()
|
||||
;
|
||||
;
|
||||
|
||||
Assert.assertEquals("[政治, 国家]", sensitiveWordBs.tags("五星红旗").toString());;
|
||||
Assert.assertEquals("[政治, 国家, 地址]", sensitiveWordBs.tags("天安门").toString());
|
||||
Assert.assertEquals("[广告]", sensitiveWordBs.tags("零售").toString());
|
||||
Assert.assertEquals("[广告]", sensitiveWordBs.tags("0售").toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
五星红旗 政治,国家
|
||||
毛主席 政治,国家,伟人
|
||||
天安门 政治,国家,地址
|
||||
天安门 政治,国家,地址
|
||||
0售 广告
|
||||
Reference in New Issue
Block a user