From 2204a5738a3b274840876c9e0b8c87cb595d1d90 Mon Sep 17 00:00:00 2001 From: houbb Date: Sun, 22 Dec 2024 13:25:33 +0800 Subject: [PATCH] release branch 0.23.1 --- CHANGE_LOG.md | 8 ++- README.md | 12 ++-- pom.xml | 2 +- release.bat | 2 +- .../sensitive/word/bs/SensitiveWordBs.java | 8 +-- .../result/WordResultHandlerWordTags.java | 3 +- .../word/utils/InnerWordTagUtils.java | 34 ++++++++++ .../word/bs/SensitiveWordBsTagTest.java | 67 +++++++++++++++++-- src/test/resources/dict_tag_test.txt | 3 +- 9 files changed, 116 insertions(+), 23 deletions(-) create mode 100644 src/main/java/com/github/houbb/sensitive/word/utils/InnerWordTagUtils.java diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 4e571bf..0bb060e 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -379,4 +379,10 @@ | 序号 | 变更类型 | 说明 | 时间 | 备注 | |:---|:-----|------------|:-------------------|:-----------| -| 1 | A | 进一步拓展结果条件类 | 2024-12-8 21:13:44 | 支持同时指定多个条件 | \ No newline at end of file +| 1 | A | 进一步拓展结果条件类 | 2024-12-8 21:13:44 | 支持同时指定多个条件 | + +# release_0.23.1 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|---------------------------------------------|:--------------------|:------------| +| 1 | O | WordResultHandlerWordTags+获取tags时,统一格式化处理优化 | 2024-12-22 12:45:53 | 保持统一,简化词库信息 | \ No newline at end of file diff --git a/README.md b/README.md index 2572222..bd7ea95 100644 --- a/README.md +++ b/README.md @@ -54,12 +54,6 @@ [CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md) -### V0.21.0 - -- 修正白名单较长,包含了黑名单,导致白名单不符合预期的场景。 - -- 新增了白名单单个的编辑操作 - ### V0.22.0 - 修正单个敏感词修改时,对应的格式处理问题 @@ -68,6 +62,10 @@ - 结果条件拓展支持 wordTags 和 chains +### V0.23.1 + +- 敏感词标签统一格式化处理优化 + ## 更多资料 ### 敏感词控台 @@ -98,7 +96,7 @@ com.github.houbb sensitive-word - 0.23.0 + 0.23.1 ``` diff --git a/pom.xml b/pom.xml index d1ea701..ebc054e 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.23.0 + 0.23.1 diff --git a/release.bat b/release.bat index 9d85ebd..5d9d43a 100644 --- a/release.bat +++ b/release.bat @@ -10,7 +10,7 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.23.0 +SET version=0.23.1 :::: 新版本名称 SET newVersion=0.24.0 :::: 组织名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index 529f074..e8936e1 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -21,6 +21,7 @@ import com.github.houbb.sensitive.word.support.result.WordResultHandlers; import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions; import com.github.houbb.sensitive.word.support.tag.WordTags; import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils; +import com.github.houbb.sensitive.word.utils.InnerWordTagUtils; import java.util.*; @@ -602,12 +603,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { * @since 0.10.0 */ public Set tags(final String word) { - if(StringUtil.isEmpty(word)) { - return Collections.emptySet(); - } - - // 是否需要格式化? - return wordTag.getTag(word); + return InnerWordTagUtils.tags(word, context); } @Override diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java index 7d1f4f2..cab8a62 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWordTags.java @@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.support.result; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordResult; import com.github.houbb.sensitive.word.utils.InnerWordCharUtils; +import com.github.houbb.sensitive.word.utils.InnerWordTagUtils; import java.util.Set; @@ -23,7 +24,7 @@ public class WordResultHandlerWordTags extends AbstractWordResultHandler wordTags = wordContext.wordTag().getTag(word); + Set wordTags = InnerWordTagUtils.tags(word, wordContext); dto.setTags(wordTags); return dto; } diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordTagUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordTagUtils.java new file mode 100644 index 0000000..93e9831 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordTagUtils.java @@ -0,0 +1,34 @@ +package com.github.houbb.sensitive.word.utils; + +import com.github.houbb.heaven.util.lang.StringUtil; +import com.github.houbb.sensitive.word.api.IWordContext; + +import java.util.Collections; +import java.util.Set; + +/** + * 内部的单词标签工具类 + * + * @since 0.24.0 + */ +public class InnerWordTagUtils { + + /** + * 获取敏感词的标签 + * + * @param word 敏感词 + * @return 结果 + * @since 0.24.0 + */ + public static Set tags(final String word, + final IWordContext wordContext) { + if(StringUtil.isEmpty(word)) { + return Collections.emptySet(); + } + + // 是否需要格式化? v0.24.0 + String formatWord = InnerWordFormatUtils.format(word, wordContext); + return wordContext.wordTag().getTag(formatWord); + } + +} diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTagTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTagTest.java index dfef147..dd6bb86 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTagTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTagTest.java @@ -1,8 +1,16 @@ package com.github.houbb.sensitive.word.bs; +import com.github.houbb.heaven.util.lang.StringUtil; +import com.github.houbb.sensitive.word.api.IWordDeny; import com.github.houbb.sensitive.word.api.IWordTag; +import com.github.houbb.sensitive.word.support.result.WordResultHandlers; +import com.github.houbb.sensitive.word.support.result.WordTagsDto; +import com.github.houbb.sensitive.word.support.tag.AbstractWordTag; import com.github.houbb.sensitive.word.support.tag.WordTags; import org.junit.Assert; +import org.junit.Test; + +import java.util.*; /** *

project: sensitive-word-SensitiveWordBsTest

@@ -13,17 +21,66 @@ import org.junit.Assert; */ public class SensitiveWordBsTagTest { - public static void main(String[] args) { - String filePath = "D:\\code\\github\\sensitive-word\\src\\test\\resources\\dict_tag_test.txt"; + private void addLine(String line, + Map> wordTagMap) { + String[] strings = line.split(" "); + String key = strings[0]; + Set tags = new HashSet<>(StringUtil.splitToList(strings[1])); + wordTagMap.put(key, tags); + } - IWordTag wordTag = WordTags.file(filePath); + @Test + public void wordResultHandlerWordTagsTest() { + // 自定义测试标签类 + final Map> wordTagMap = new HashMap<>(); + addLine("0售 广告", wordTagMap); + IWordTag wordTag = new AbstractWordTag() { + @Override + protected Set doGetTag(String word) { + return wordTagMap.get(word); + } + }; + // 指定初始化 + SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() + .wordDeny(new IWordDeny() { + @Override + public List deny() { + return Arrays.asList("0售"); + } + }) + .wordTag(wordTag) + .init() + ; + List wordTagsDtoList1 = sensitiveWordBs.findAll("零售", WordResultHandlers.wordTags()); + Assert.assertEquals("[WordTagsDto{word='零售', tags=[广告]}]", wordTagsDtoList1.toString()); + + List wordTagsDtoList2 = sensitiveWordBs.findAll("0售", WordResultHandlers.wordTags()); + Assert.assertEquals("[WordTagsDto{word='0售', tags=[广告]}]", wordTagsDtoList2.toString()); + } + + @Test + public void wordTagsTest() { + // 自定义测试标签类 + final Map> wordTagMap = new HashMap<>(); + addLine("0售 广告", wordTagMap); + addLine("天安门 政治,国家,地址", wordTagMap); + IWordTag wordTag = new AbstractWordTag() { + @Override + protected Set doGetTag(String word) { + return wordTagMap.get(word); + } + }; + + // 指定初始化 SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() .wordTag(wordTag) .init() - ; + ; - Assert.assertEquals("[政治, 国家]", sensitiveWordBs.tags("五星红旗").toString());; + Assert.assertEquals("[政治, 国家, 地址]", sensitiveWordBs.tags("天安门").toString()); + Assert.assertEquals("[广告]", sensitiveWordBs.tags("零售").toString()); + Assert.assertEquals("[广告]", sensitiveWordBs.tags("0售").toString()); } } diff --git a/src/test/resources/dict_tag_test.txt b/src/test/resources/dict_tag_test.txt index 02889a7..81c0430 100644 --- a/src/test/resources/dict_tag_test.txt +++ b/src/test/resources/dict_tag_test.txt @@ -1,3 +1,4 @@ 五星红旗 政治,国家 毛主席 政治,国家,伟人 -天安门 政治,国家,地址 \ No newline at end of file +天安门 政治,国家,地址 +0售 广告 \ No newline at end of file