diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index aab9c14..f989f24 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -285,3 +285,10 @@ | 5 | O | 移除 涿州、中国联通、中国网通、李老师、写字楼、人民银行、卡拉、牛魔王、 张杰、哪里找、爱因斯坦、天地之间、不玩了、里根、监听器、监听 | 2024-4-9 21:04:18 | | | 6 | O | 移除 选举、登陆、中国移动、运营者、客户服务、精益求精、下载速度、好朋友、拦截器、账号、无界、深入浅出、腾讯、解码器、管理员、白皮书 | 2024-4-9 21:04:18 | | | 7 | O | 移除 监听、运营商、一起玩、转化、超市 | 2024-4-9 21:04:18 | | + + +# release_0.14.0 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|------------|:-------------------|:-------------------------------------------------| +| 1 | A | 结果添加敏感词的类别 | 2024-4-11 15:02:25 | | diff --git a/README.md b/README.md index 933af03..56314a4 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,8 @@ [CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md) +V0.14.0: raw 添加敏感词类别。 + ## 更多资料 ### 敏感词控台 @@ -82,7 +84,7 @@ com.github.houbb sensitive-word - 0.13.4 + 0.14.0 ``` @@ -127,15 +129,6 @@ SensitiveWordHelper.findFirst(text) 等价于: String word = SensitiveWordHelper.findFirst(text, WordResultHandlers.word()); ``` -WordResultHandlers.raw() 可以保留对应的下标信息: - -```java -final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; - -IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw()); -Assert.assertEquals("WordResult{startIndex=0, endIndex=4}", word.toString()); -``` - ### 返回所有敏感词 ```java @@ -153,13 +146,12 @@ SensitiveWordHelper.findAll(text) 等价于: List wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.word()); ``` -WordResultHandlers.raw() 可以保留对应的下标信息: +WordResultHandlers.raw() 可以保留对应的下标信息、类别信息: ```java -final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; - -List wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw()); -Assert.assertEquals("[WordResult{startIndex=0, endIndex=4}, WordResult{startIndex=9, endIndex=12}, WordResult{startIndex=18, endIndex=21}]", wordList.toString()); +final String text = "骂人:你他妈; 邮箱:123@qq.com; mobile: 13088889999; 网址:https://www.baidu.com"; +List wordList3 = SensitiveWordHelper.findAll(text, WordResultHandlers.raw()); +Assert.assertEquals("[WordResult{startIndex=3, endIndex=6, type='WORD'}, WordResult{startIndex=11, endIndex=21, type='EMAIL'}, WordResult{startIndex=31, endIndex=42, type='NUM'}, WordResult{startIndex=55, endIndex=68, type='URL'}]", wordList3.toString()); ``` ### 默认的替换策略 diff --git a/pom.xml b/pom.xml index 573d4cb..f30901a 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.13.4 + 0.14.0 diff --git a/release.bat b/release.bat index aa42699..0b7323f 100644 --- a/release.bat +++ b/release.bat @@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.13.4 +SET version=0.14.0 :::: 新版本名称 -SET newVersion=0.14.0 +SET newVersion=0.15.0 :::: 组织名称 SET groupName=com.github.houbb :::: 项目名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java index aa310da..ceaaf47 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordResult.java @@ -21,4 +21,11 @@ public interface IWordResult { */ int endIndex(); + /** + * 类别 + * @return 类别 + * @since 0.14.0 + */ + String type(); + } diff --git a/src/main/java/com/github/houbb/sensitive/word/constant/enums/WordTypeEnum.java b/src/main/java/com/github/houbb/sensitive/word/constant/enums/WordTypeEnum.java new file mode 100644 index 0000000..e26a8ea --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/constant/enums/WordTypeEnum.java @@ -0,0 +1,31 @@ +package com.github.houbb.sensitive.word.constant.enums; + +/** + * 单词类别包含类别 + * @since 0.14.0 + */ +public enum WordTypeEnum { + WORD("WORD", "敏感词"), + EMAIL("EMAIL", "邮箱"), + URL("URL", "链接"), + NUM("NUM", "数字"), + + DEFAULTS("DEFAULTS", "默认"), + ; + + private final String code; + private final String desc; + + WordTypeEnum(String code, String desc) { + this.code = code; + this.desc = desc; + } + + public String getCode() { + return code; + } + + public String getDesc() { + return desc; + } +} diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java index 479bc50..c31a3f0 100644 --- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java @@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.core; import com.github.houbb.heaven.util.guava.Guavas; import com.github.houbb.sensitive.word.api.*; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum; import com.github.houbb.sensitive.word.support.check.WordCheckResult; import com.github.houbb.sensitive.word.support.result.WordResult; @@ -67,7 +68,8 @@ public class SensitiveWord extends AbstractSensitiveWord { // 保存敏感词 WordResult wordResult = WordResult.newInstance() .startIndex(i) - .endIndex(i+wordLength); + .endIndex(i+wordLength) + .type(checkResult.type()); //v0.13.0 添加判断 if(wordResultCondition.match(wordResult, text, modeEnum, context)) { resultList.add(wordResult); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java index 2c8c044..5b36b05 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java @@ -30,18 +30,31 @@ public abstract class AbstractWordCheck implements IWordCheck { */ protected abstract int getActualLength(int beginIndex, final InnerSensitiveWordContext checkContext); + /** + * 获取类别 + * @return 类别 + * @since 0.14.0 + */ + protected abstract String getType(); + @Override public WordCheckResult sensitiveCheck(int beginIndex, final InnerSensitiveWordContext checkContext) { Class clazz = getSensitiveCheckClass(); final String txt = checkContext.originalText(); if(StringUtil.isEmpty(txt)) { - return WordCheckResult.of(0, clazz); + return WordCheckResult.newInstance() + .index(0) + .type(getType()) + .checkClass(clazz); } int actualLength = getActualLength(beginIndex, checkContext); - return WordCheckResult.of(actualLength, clazz); + return WordCheckResult.newInstance() + .index(actualLength) + .type(getType()) + .checkClass(clazz); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckEmail.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckEmail.java index 4037deb..01ca88a 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckEmail.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckEmail.java @@ -6,6 +6,7 @@ import com.github.houbb.heaven.util.util.regex.RegexUtil; import com.github.houbb.sensitive.word.api.IWordCheck; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; import com.github.houbb.sensitive.word.constant.WordConst; +import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; /** * email 正则表达式检测实现。 @@ -39,6 +40,11 @@ public class WordCheckEmail extends AbstractConditionWordCheck { return WordCheckEmail.class; } + @Override + protected String getType() { + return WordTypeEnum.EMAIL.getCode(); + } + @Override protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) { return CharUtil.isEmilChar(mappingChar); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNone.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNone.java index 24f5e54..c9fba58 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNone.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNone.java @@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.support.check; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.sensitive.word.api.IWordCheck; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; /** * 未匹配 @@ -25,7 +26,10 @@ public class WordCheckNone implements IWordCheck { /** * 只有一个未匹配 */ - private static final WordCheckResult NONE_RESULT = WordCheckResult.of(0, WordCheckNone.class); + private static final WordCheckResult NONE_RESULT = WordCheckResult.newInstance() + .type(WordTypeEnum.DEFAULTS.getCode()) + .index(0) + .checkClass(WordCheckNone.class); public static WordCheckResult getNoneResult() { return NONE_RESULT; diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNum.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNum.java index 808eac9..bffdcdf 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNum.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNum.java @@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.support.check; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.sensitive.word.api.IWordCheck; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; /** * 敏感词监测实现 @@ -28,6 +29,11 @@ public class WordCheckNum extends AbstractConditionWordCheck { return WordCheckNum.class; } + @Override + protected String getType() { + return WordTypeEnum.NUM.getCode(); + } + @Override protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) { return Character.isDigit(mappingChar); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckResult.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckResult.java index f07110d..1fdba99 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckResult.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckResult.java @@ -24,17 +24,15 @@ public class WordCheckResult { private Class checkClass; /** - * 实例化 - * @param index 返回索引 - * @param checkClass 验证类 - * @return 结果 - * @since 0.0.12 + * 单词类别 + * @since 0.14.0 */ - public static WordCheckResult of(final int index, - final Class checkClass) { - WordCheckResult result = new WordCheckResult(); - result.index(index).checkClass(checkClass); - return result; + private String type; + + private WordCheckResult(){} + + public static WordCheckResult newInstance() { + return new WordCheckResult(); } public int index() { @@ -55,11 +53,21 @@ public class WordCheckResult { return this; } + public String type() { + return type; + } + + public WordCheckResult type(String type) { + this.type = type; + return this; + } + @Override public String toString() { - return "SensitiveCheckResult{" + + return "WordCheckResult{" + "index=" + index + ", checkClass=" + checkClass + + ", type='" + type + '\'' + '}'; } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java index 88db3bf..6e797c9 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java @@ -6,6 +6,7 @@ import com.github.houbb.heaven.util.util.regex.RegexUtil; import com.github.houbb.sensitive.word.api.IWordCheck; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; import com.github.houbb.sensitive.word.constant.WordConst; +import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; /** * URL 正则表达式检测实现。 @@ -36,6 +37,11 @@ public class WordCheckUrl extends AbstractConditionWordCheck { return WordCheckUrl.class; } + @Override + protected String getType() { + return WordTypeEnum.URL.getCode(); + } + @Override protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) { return CharUtil.isWebSiteChar(mappingChar); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java index b19a14c..bf2d0b1 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java @@ -6,6 +6,7 @@ import com.github.houbb.sensitive.word.api.IWordCheck; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordData; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum; import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; @@ -83,4 +84,9 @@ public class WordCheckWord extends AbstractWordCheck { return actualLength; } + @Override + protected String getType() { + return WordTypeEnum.WORD.getCode(); + } + } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java index 10fa165..fa6e018 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResult.java @@ -12,6 +12,14 @@ public class WordResult implements IWordResult { private int endIndex; + /** + * 词类别 + * @since 0.14.0 + */ + private String type; + + private WordResult(){} + public static WordResult newInstance() { return new WordResult(); } @@ -36,11 +44,22 @@ public class WordResult implements IWordResult { return this; } + @Override + public String type() { + return type; + } + + public WordResult type(String type) { + this.type = type; + return this; + } + @Override public String toString() { return "WordResult{" + "startIndex=" + startIndex + ", endIndex=" + endIndex + + ", type='" + type + '\'' + '}'; } diff --git a/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java index 93e8f58..71640fe 100644 --- a/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java @@ -62,7 +62,7 @@ public class SensitiveWordHelperTest { final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; List wordList = SensitiveWordHelper.findAll(text, WordResultHandlers.raw()); - Assert.assertEquals("[WordResult{startIndex=0, endIndex=4}, WordResult{startIndex=9, endIndex=12}, WordResult{startIndex=18, endIndex=21}]", wordList.toString()); + Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD'}, WordResult{startIndex=9, endIndex=12, type='WORD'}, WordResult{startIndex=18, endIndex=21, type='WORD'}]", wordList.toString()); } @@ -99,7 +99,7 @@ public class SensitiveWordHelperTest { final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw()); - Assert.assertEquals("WordResult{startIndex=0, endIndex=4}", word.toString()); + Assert.assertEquals("WordResult{startIndex=0, endIndex=4, type='WORD'}", word.toString()); } /** diff --git a/src/test/java/com/github/houbb/sensitive/word/support/handler/WordResultHandlerTest.java b/src/test/java/com/github/houbb/sensitive/word/support/handler/WordResultHandlerTest.java index dd0784f..e9d146d 100644 --- a/src/test/java/com/github/houbb/sensitive/word/support/handler/WordResultHandlerTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/support/handler/WordResultHandlerTest.java @@ -27,7 +27,14 @@ public class WordResultHandlerTest { Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList2.toString()); List wordList3 = SensitiveWordHelper.findAll(text, WordResultHandlers.raw()); - Assert.assertEquals("[WordResult{startIndex=0, endIndex=4}, WordResult{startIndex=9, endIndex=12}, WordResult{startIndex=18, endIndex=21}]", wordList3.toString()); + Assert.assertEquals("[WordResult{startIndex=0, endIndex=4, type='WORD'}, WordResult{startIndex=9, endIndex=12, type='WORD'}, WordResult{startIndex=18, endIndex=21, type='WORD'}]", wordList3.toString()); + } + + @Test + public void findAllWordTest2() { + final String text = "骂人:你他妈; 邮箱:123@qq.com; mobile: 13088889999; 网址:https://www.baidu.com"; + List wordList3 = SensitiveWordHelper.findAll(text, WordResultHandlers.raw()); + Assert.assertEquals("[WordResult{startIndex=3, endIndex=6, type='WORD'}, WordResult{startIndex=11, endIndex=21, type='EMAIL'}, WordResult{startIndex=31, endIndex=42, type='NUM'}, WordResult{startIndex=55, endIndex=68, type='URL'}]", wordList3.toString()); } @Test