From 2cae9d6cfe9500c4c177c9823bae7a3c70b1cb53 Mon Sep 17 00:00:00 2001 From: houbb <1060732496@qq.com> Date: Sun, 8 Aug 2021 20:54:59 +0800 Subject: [PATCH] release branch 0.1.0 --- CHANGE_LOG.md | 8 +++ README.md | 57 +++++++++++++++++- pom.xml | 2 +- release.bat | 4 +- .../houbb/sensitive/word/api/IWordMap.java | 4 +- .../houbb/sensitive/word/api/IWordResult.java | 31 ++++++++++ .../word/api/IWordResultHandler.java | 18 ++++++ .../sensitive/word/bs/SensitiveWordBs.java | 52 +++++++++++++--- .../word/core/SensitiveWordHelper.java | 33 +++++++++- .../word/support/map/SensitiveWordMap.java | 22 ++++--- .../word/support/result/WordResult.java | 60 +++++++++++++++++++ .../support/result/WordResultHandlerRaw.java | 20 +++++++ .../support/result/WordResultHandlerWord.java | 24 ++++++++ .../support/result/WordResultHandlers.java | 34 +++++++++++ src/main/resources/dict.txt | 10 ---- .../word/core/SensitiveWordHelperTest.java | 51 ++++++++++++++++ 16 files changed, 394 insertions(+), 36 deletions(-) create mode 100644 src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/api/IWordResultHandler.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlers.java diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 7c5990e..cc1c6a1 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -112,3 +112,11 @@ | 序号 | 变更类型 | 说明 | 时间 | 备注 | |:---|:---|:---|:---|:--| | 1 | A | 优化 init 方式 | 2021-7-16 20:51:58 | | + +# release_0.1.0 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:---|:---|:---|:--| +| 1 | A | 返回敏感词对应的下标范围 | 2021-8-8 20:51:58 | | +| 2 | U | ignoreRepeat 默认为 false | 2021-8-8 20:51:58 | | +| 3 | U | 把测试、系统、买卖、彩票等常用词移出敏感词库 | 2021-8-8 20:51:58 | | diff --git a/README.md b/README.md index ab300d3..e98fbef 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,12 @@ [CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/doc/CHANGE_LOG.md) +v0.1.0 变更: + +- 返回敏感词对应的开始结束下标信息 + +- 优化词库 + # 快速开始 ## 准备 @@ -58,7 +64,7 @@ com.github.houbb sensitive-word - 0.0.15 + 0.1.0 ``` @@ -69,9 +75,24 @@ | 方法 | 参数 | 返回值| 说明 | |:---|:---|:---|:---| | contains(String) | 待验证的字符串 | 布尔值 | 验证字符串是否包含敏感词 | -| findAll(String) | 待验证的字符串 | 字符串列表 | 返回字符串中所有敏感词 | | replace(String, char) | 使用指定的 char 替换敏感词 | 字符串 | 返回脱敏后的字符串 | | replace(String) | 使用 `*` 替换敏感词 | 字符串 | 返回脱敏后的字符串 | +| findAll(String) | 待验证的字符串 | 字符串列表 | 返回字符串中所有敏感词 | +| findFirst(String) | 待验证的字符串 | 字符串 | 返回字符串中第一个敏感词 | +| findAll(String, IWordResultHandler) | IWordResultHandler 结果处理类 | 字符串列表 | 返回字符串中所有敏感词 | +| findFirst(String, IWordResultHandler) | IWordResultHandler 结果处理类 | 字符串 | 返回字符串中第一个敏感词 | + +IWordResultHandler 可以对敏感词的结果进行处理,允许用户自定义。 + +内置实现见 `WordResultHandlers` 工具类: + +- WordResultHandlers.word() + +只保留敏感词单词本身。 + +- WordResultHandlers.raw() + +保留敏感词相关信息,包含敏感词,开始和结束下标。 ## 使用实例 @@ -94,6 +115,21 @@ String word = SensitiveWordHelper.findFirst(text); Assert.assertEquals("五星红旗", word); ``` +SensitiveWordHelper.findFirst(text) 等价于: + +```java +String word = SensitiveWordHelper.findFirst(text, WordResultHandlers.word()); +``` + +WordResultHandlers.raw() 可以保留对应的下标信息: + +```java +final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; + +IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw()); +Assert.assertEquals("WordResult{word='五星红旗', startIndex=0, endIndex=4}", word.toString()); +``` + ### 返回所有敏感词 ```java @@ -103,6 +139,23 @@ List wordList = SensitiveWordHelper.findAll(text); Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString()); ``` +返回所有敏感词用法上类似于 SensitiveWordHelper.findFirst(),同样也支持指定结果处理类。 + +SensitiveWordHelper.findAll(text) 等价于: + +```java +List wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.word()); +``` + +WordResultHandlers.raw() 可以保留对应的下标信息: + +```java +final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; + +List wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw()); +Assert.assertEquals("[WordResult{word='五星红旗', startIndex=0, endIndex=4}, WordResult{word='毛主席', startIndex=9, endIndex=12}, WordResult{word='天安门', startIndex=18, endIndex=21}]", wordList.toString()); +``` + ### 默认的替换策略 ```java diff --git a/pom.xml b/pom.xml index 1a6c3b9..f61150f 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.0.15 + 0.1.0 diff --git a/release.bat b/release.bat index 43b1415..c75af46 100644 --- a/release.bat +++ b/release.bat @@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.0.15 +SET version=0.1.0 :::: 新版本名称 -SET newVersion=0.0.16 +SET newVersion=0.2.0 :::: 组织名称 SET groupName=com.github.houbb :::: 项目名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java index 2b30244..9bf2140 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java @@ -40,7 +40,7 @@ public interface IWordMap extends ISensitiveCheck { * @since 0.0.1 * @see ValidModeEnum#FAIL_OVER 建议使用全部检测返回模式 */ - List findAll(final String string, + List findAll(final String string, final IWordContext context); /** @@ -50,7 +50,7 @@ public interface IWordMap extends ISensitiveCheck { * @return 结果 * @since 0.0.1 */ - String findFirst(final String string, + IWordResult findFirst(final String string, final IWordContext context); /** diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java new file mode 100644 index 0000000..ec101b7 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java @@ -0,0 +1,31 @@ +package com.github.houbb.sensitive.word.api; + +/** + * 敏感词的结果 + * @author binbin.hou + * @since 0.1.0 + */ +public interface IWordResult { + + /** + * 敏感词 + * @return 敏感词 + * @since 0.1.0 + */ + String word(); + + /** + * 开始下标 + * @return 开始下标 + * @since 0.1.0 + */ + int startIndex(); + + /** + * 结束下标 + * @return 结束下标 + * @since 0.1.0 + */ + int endIndex(); + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordResultHandler.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordResultHandler.java new file mode 100644 index 0000000..9ecd075 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordResultHandler.java @@ -0,0 +1,18 @@ +package com.github.houbb.sensitive.word.api; + +/** + * 敏感词的结果处理 + * @author binbin.hou + * @since 0.1.0 + */ +public interface IWordResultHandler { + + /** + * 对于结果的处理 + * @param wordResult 结果 + * @return 处理结果 + * @since 0.1.0 + */ + R handle(final IWordResult wordResult); + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index a8f0863..7b20c89 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -1,15 +1,14 @@ package com.github.houbb.sensitive.word.bs; import com.github.houbb.heaven.constant.CharConst; +import com.github.houbb.heaven.support.handler.IHandler; import com.github.houbb.heaven.util.common.ArgUtil; import com.github.houbb.heaven.util.util.CollectionUtil; -import com.github.houbb.sensitive.word.api.IWordAllow; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.api.IWordDeny; -import com.github.houbb.sensitive.word.api.IWordMap; +import com.github.houbb.sensitive.word.api.*; import com.github.houbb.sensitive.word.support.allow.WordAllows; import com.github.houbb.sensitive.word.support.deny.WordDenys; import com.github.houbb.sensitive.word.support.map.SensitiveWordMap; +import com.github.houbb.sensitive.word.support.result.WordResultHandlers; import java.util.List; @@ -240,7 +239,7 @@ public class SensitiveWordBs { wordContext.ignoreNumStyle(true); wordContext.ignoreChineseStyle(true); wordContext.ignoreEnglishStyle(true); - wordContext.ignoreRepeat(true); + wordContext.ignoreRepeat(false); // 开启校验 wordContext.sensitiveCheckNum(true); @@ -273,9 +272,7 @@ public class SensitiveWordBs { * @since 0.0.1 */ public List findAll(final String target) { - statusCheck(); - - return sensitiveWordMap.findAll(target, context); + return findAll(target, WordResultHandlers.word()); } /** @@ -287,11 +284,48 @@ public class SensitiveWordBs { * @since 0.0.1 */ public String findFirst(final String target) { + return findFirst(target, WordResultHandlers.word()); + } + + /** + * 返回所有的敏感词 + * 1. 这里是默认去重的,且是有序的。 + * 2. 如果不存在,返回空列表 + * + * @param target 目标字符串 + * @return 敏感词列表 + * @since 0.0.1 + */ + public List findAll(final String target, final IWordResultHandler handler) { + ArgUtil.notNull(handler, "handler"); statusCheck(); - return sensitiveWordMap.findFirst(target, context); + List wordResults = sensitiveWordMap.findAll(target, context); + return CollectionUtil.toList(wordResults, new IHandler() { + @Override + public R handle(IWordResult wordResult) { + return handler.handle(wordResult); + } + }); } + /** + * 返回第一个敏感词 + * (1)如果不存在,则返回 {@code null} + * + * @param target 目标字符串 + * @return 敏感词 + * @since 0.0.1 + */ + public R findFirst(final String target, final IWordResultHandler handler) { + ArgUtil.notNull(handler, "handler"); + statusCheck(); + + IWordResult wordResult = sensitiveWordMap.findFirst(target, context); + return handler.handle(wordResult); + } + + /** * 替换所有内容 * diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java index 701eb40..5065441 100644 --- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java +++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java @@ -1,20 +1,24 @@ package com.github.houbb.sensitive.word.core; +import com.github.houbb.sensitive.word.api.IWordResultHandler; import com.github.houbb.sensitive.word.bs.SensitiveWordBs; import java.util.List; /** * 敏感词工具类 + * * @author binbin.hou * @since 0.0.13 */ public final class SensitiveWordHelper { - private SensitiveWordHelper(){} + private SensitiveWordHelper() { + } /** * 默认的实现 + * * @since 0.0.13 */ private static final SensitiveWordBs WORD_BS = SensitiveWordBs.newInstance().init(); @@ -79,4 +83,31 @@ public final class SensitiveWordHelper { return WORD_BS.replace(target); } + /** + * 返回所有的敏感词 + * + * @param target 目标字符串 + * @param handler 结果处理类 + * @return 敏感词列表 + * @since 0.1.0 + */ + public static List findAll(final String target, + final IWordResultHandler handler) { + return WORD_BS.findAll(target, handler); + } + + /** + * 返回第一个敏感词 + * (1)如果不存在,则返回 {@code null} + * + * @param target 目标字符串 + * @param handler 结果处理类 + * @return 敏感词 + * @since 0.1.0 + */ + public static R findFirst(final String target, + final IWordResultHandler handler) { + return WORD_BS.findFirst(target, handler); + } + } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java index e42bfef..683ee10 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java @@ -11,11 +11,13 @@ import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.heaven.util.util.MapUtil; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordMap; +import com.github.houbb.sensitive.word.api.IWordResult; import com.github.houbb.sensitive.word.constant.AppConst; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckChain; import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl; +import com.github.houbb.sensitive.word.support.result.WordResult; import java.util.Collection; import java.util.HashMap; @@ -131,20 +133,20 @@ public class SensitiveWordMap implements IWordMap { /** * 返回所有对应的敏感词 * (1)结果是有序的 - * (2)结果是默认去重的 + * (2)为了保留所有的下标,结果从 v0.1.0 之后不再去重。 * * @param string 原始字符串 * @return 结果 * @since 0.0.1 */ @Override - public List findAll(String string, final IWordContext context) { + public List findAll(String string, final IWordContext context) { return getSensitiveWords(string, ValidModeEnum.FAIL_OVER, context); } @Override - public String findFirst(String string, final IWordContext context) { - List stringList = getSensitiveWords(string, ValidModeEnum.FAIL_FAST, context); + public IWordResult findFirst(String string, final IWordContext context) { + List stringList = getSensitiveWords(string, ValidModeEnum.FAIL_FAST, context); if (CollectionUtil.isEmpty(stringList)) { return null; @@ -170,14 +172,14 @@ public class SensitiveWordMap implements IWordMap { * @return 结果列表 * @since 0.0.1 */ - private List getSensitiveWords(final String text, final ValidModeEnum modeEnum, + private List getSensitiveWords(final String text, final ValidModeEnum modeEnum, final IWordContext context) { //1. 是否存在敏感词,如果比存在,直接返回空列表 if (StringUtil.isEmpty(text)) { return Guavas.newArrayList(); } - List resultList = Guavas.newArrayList(); + List resultList = Guavas.newArrayList(); for (int i = 0; i < text.length(); i++) { SensitiveCheckResult checkResult = sensitiveCheck(text, i, ValidModeEnum.FAIL_OVER, context); // 命中 @@ -187,9 +189,11 @@ public class SensitiveWordMap implements IWordMap { String sensitiveWord = text.substring(i, i + wordLength); // 添加去重 - if (!resultList.contains(sensitiveWord)) { - resultList.add(sensitiveWord); - } + WordResult wordResult = WordResult.newInstance() + .startIndex(i) + .endIndex(i+wordLength) + .word(sensitiveWord); + resultList.add(wordResult); // 快速返回 if (ValidModeEnum.FAIL_FAST.equals(modeEnum)) { diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java new file mode 100644 index 0000000..5980854 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java @@ -0,0 +1,60 @@ +package com.github.houbb.sensitive.word.support.result; + +import com.github.houbb.sensitive.word.api.IWordResult; + +/** + * @author binbin.hou + * @since 0.1.0 + */ +public class WordResult implements IWordResult { + + private String word; + + private int startIndex; + + private int endIndex; + + public static WordResult newInstance() { + return new WordResult(); + } + + @Override + public String word() { + return word; + } + + public WordResult word(String word) { + this.word = word; + return this; + } + + @Override + public int startIndex() { + return startIndex; + } + + public WordResult startIndex(int startIndex) { + this.startIndex = startIndex; + return this; + } + + @Override + public int endIndex() { + return endIndex; + } + + public WordResult endIndex(int endIndex) { + this.endIndex = endIndex; + return this; + } + + @Override + public String toString() { + return "WordResult{" + + "word='" + word + '\'' + + ", startIndex=" + startIndex + + ", endIndex=" + endIndex + + '}'; + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java new file mode 100644 index 0000000..a34b5c2 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java @@ -0,0 +1,20 @@ +package com.github.houbb.sensitive.word.support.result; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.sensitive.word.api.IWordResult; +import com.github.houbb.sensitive.word.api.IWordResultHandler; + +/** + * 不做任何处理 + * @author binbin.hou + * @since 0.1.0 + */ +@ThreadSafe +public class WordResultHandlerRaw implements IWordResultHandler { + + @Override + public IWordResult handle(IWordResult wordResult) { + return wordResult; + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java new file mode 100644 index 0000000..9853c5a --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java @@ -0,0 +1,24 @@ +package com.github.houbb.sensitive.word.support.result; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.sensitive.word.api.IWordResult; +import com.github.houbb.sensitive.word.api.IWordResultHandler; + +/** + * 只保留单词 + * + * @author binbin.hou + * @since 0.1.0 + */ +@ThreadSafe +public class WordResultHandlerWord implements IWordResultHandler { + + @Override + public String handle(IWordResult wordResult) { + if(wordResult == null) { + return null; + } + return wordResult.word(); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlers.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlers.java new file mode 100644 index 0000000..599cb87 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlers.java @@ -0,0 +1,34 @@ +package com.github.houbb.sensitive.word.support.result; + +import com.github.houbb.heaven.support.instance.impl.Instances; +import com.github.houbb.sensitive.word.api.IWordResult; +import com.github.houbb.sensitive.word.api.IWordResultHandler; + +/** + * 敏感词的结果处理 + * @author binbin.hou + * @since 0.1.0 + */ +public final class WordResultHandlers { + + private WordResultHandlers(){} + + /** + * 不做任何处理 + * @return 结果 + * @since 0.1.0 + */ + public static IWordResultHandler raw() { + return Instances.singleton(WordResultHandlerRaw.class); + } + + /** + * 只保留单词 + * @return 结果 + * @since 0.1.0 + */ + public static IWordResultHandler word() { + return Instances.singleton(WordResultHandlerWord.class); + } + +} diff --git a/src/main/resources/dict.txt b/src/main/resources/dict.txt index a15e8db..d8c4a88 100644 --- a/src/main/resources/dict.txt +++ b/src/main/resources/dict.txt @@ -39,7 +39,6 @@ 106bb成人小电影 1074套豪华住宅 107bb成人小电影 -108 10bet娱乐城 10littleasiansvol1410人微乳素人娘 10种肮脏的职业 @@ -74,10 +73,8 @@ 14se淫色图片 14se淫色图片免费vagaa电影免费电影 14岁幼女b嫩阴 -14我 14色se成人站 15年前的共和国卫士 -15年纪念日 1608请牢记 163网址宽频影院网 16474勼55121 @@ -10808,7 +10805,6 @@ z以留吧以其以武 买凶杀人 买到冰毒 买到枪 -买卖 买卖16号双管猎枪 买卖1元假硬币 买卖1元假硬币qq @@ -44047,9 +44043,7 @@ z以留吧以其以武 测拉式气枪 测拿 测绘局 -测试 测试inter -测试员 测试哈哈哈哈啊 测试测试数据817 测试额81719792825477 @@ -50977,7 +50971,6 @@ z以留吧以其以武 简鸿章 管制刀具 管猎 -管理 管理人员 管理员 管理就是收费 @@ -51139,7 +51132,6 @@ z以留吧以其以武 糖舒宁胶囊 系列户改措施 系列送礼活动 -系统 系统中心 系统信息 系统公告 @@ -65355,5 +65347,3 @@ z以留吧以其以武 龟毛 𨰾 𫔰苞价咯 -彩票 -机票 diff --git a/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java index 1357206..978b009 100644 --- a/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java @@ -1,5 +1,7 @@ package com.github.houbb.sensitive.word.core; +import com.github.houbb.sensitive.word.api.IWordResult; +import com.github.houbb.sensitive.word.support.result.WordResultHandlers; import org.junit.Assert; import org.junit.Test; @@ -37,6 +39,31 @@ public class SensitiveWordHelperTest { Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString()); } + /** + * 返回所有敏感词 + * @since 0.1.0 + */ + @Test + public void findAllWordTest() { + final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; + + List wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.word()); + Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString()); + } + + /** + * 返回所有敏感词-包含下标志 + * @since 0.1.0 + */ + @Test + public void findAllRawTest() { + final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; + + List wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw()); + Assert.assertEquals("[WordResult{word='五星红旗', startIndex=0, endIndex=4}, WordResult{word='毛主席', startIndex=9, endIndex=12}, WordResult{word='天安门', startIndex=18, endIndex=21}]", wordList.toString()); + } + + /** * 返回所有第一个匹配的敏感词 * @since 0.0.1 @@ -49,6 +76,30 @@ public class SensitiveWordHelperTest { Assert.assertEquals("五星红旗", word); } + /** + * 返回所有第一个匹配的敏感词 + * @since 0.0.1 + */ + @Test + public void findFirstWordTest() { + final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; + + String word = SensitiveWordHelper.findFirst(text, WordResultHandlers.word()); + Assert.assertEquals("五星红旗", word); + } + + /** + * 返回所有第一个匹配的敏感词 + * @since 0.0.1 + */ + @Test + public void findFirstRawTest() { + final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; + + IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw()); + Assert.assertEquals("WordResult{word='五星红旗', startIndex=0, endIndex=4}", word.toString()); + } + /** * 默认的替换策略 * @since 0.0.2