diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 178c077..d9516a6 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -189,4 +189,14 @@ | 序号 | 变更类型 | 说明 | 时间 | 备注 | |:---|:-----|---------------------------------------------|:--------------------|:----------------| -| 1 | A | IWordMap 命名调整为 IWordData, 添加 Tree 实现。优化内存占用 | 2023-06-09 23:51:58 | 避免过于限制,放开便于后续拓展 | \ No newline at end of file +| 1 | A | IWordMap 命名调整为 IWordData, 添加 Tree 实现。优化内存占用 | 2023-06-09 23:51:58 | 避免过于限制,放开便于后续拓展 | + +# release_0.8.0 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|---------------------------------------------|:--------------------|:----------------| +| 1 | A | 添加 ICharFormatCombine | 2023-06-10 23:51:58 | 允许用户自定义格式化组合策略 | +| 2 | A | 添加 ISensitiveCheckCombine | 2023-06-10 23:51:58 | 允许用户自定义敏感词校验组合策略 | +| 3 | A | 添加 IWordAllowDenyCombine | 2023-06-10 23:51:58 | 允许用户自定义 allow+deny 的组合策略 | +| 4 | A | 添加引导类进阶的配置使用说明 | 2023-06-10 23:51:58 | 放在后续,避免内部接口不够稳定 | +| 5 | U | 内部接口名称统一为 IWordXXX | 2023-06-10 23:51:58 | | diff --git a/README.md b/README.md index 16e9760..8d75b46 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ com.github.houbb sensitive-word - 0.7.0 + 0.8.0 ``` @@ -89,7 +89,7 @@ IWordResultHandler 可以对敏感词的结果进行处理,允许用户自定 - WordResultHandlers.raw() -保留敏感词相关信息,包含敏感词,开始和结束下标。 +保留敏感词相关信息,包含敏感词的开始和结束下标。 ## 使用实例 @@ -124,7 +124,7 @@ WordResultHandlers.raw() 可以保留对应的下标信息: final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; IWordResult word = SensitiveWordHelper.findFirst(text, WordResultHandlers.raw()); -Assert.assertEquals("WordResult{word='五星红旗', startIndex=0, endIndex=4}", word.toString()); +Assert.assertEquals("WordResult{startIndex=0, endIndex=4}", word.toString()); ``` ### 返回所有敏感词 @@ -198,11 +198,11 @@ public void defineReplaceTest() { 其中 `MySensitiveWordReplace` 是我们自定义的替换策略,实现如下: ```java -public class MySensitiveWordReplace implements ISensitiveWordReplace { +public class MyWordReplace implements IWordReplace { @Override - public String replace(ISensitiveWordReplaceContext context) { - String sensitiveWord = InnerCharUtils.getString(rawChars, wordResult); + public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) { + String sensitiveWord = InnerWordCharUtils.getString(rawChars, wordResult); // 自定义不同的敏感词替换策略,可以从数据库等地方读取 if("五星红旗".equals(sensitiveWord)) { stringBuilder.append("国家旗帜"); @@ -386,7 +386,6 @@ Assert.assertTrue(wordBs.contains(text)); | 9 | enableUrlCheck | 是否启用链接检测 | true | | 10 | enableWordCheck | 是否启用敏感单词检测 | true | | 11 | numCheckLen | 数字检测,自定义指定长度。 | 8 | -| 12 | sensitiveWordReplace | 敏感词替换策略 | `*` 替换 | # 动态加载(用户自定义) @@ -664,23 +663,15 @@ ps: 不同环境会有差异,但是比例基本稳定。 - [x] wordData 的内存占用对比 + 优化 -- [ ] 用户指定自定义的词组,同时允许指定词组的组合获取,更加灵活 +- [x] 用户指定自定义的词组,同时允许指定词组的组合获取,更加灵活 -ICharFormat/ISensitiveCheck/Word 方法,允许用户自定义。 +FormatCombine/CheckCombine/AllowDenyCombine 组合策略,允许用户自定义。 - [ ] word check 策略的优化,统一遍历+转换 -- [ ] DFA 数据结构的另一种实现 +- [ ] 添加 ThreadLocal 等性能优化 -- 同音字处理 - -- 形近字处理 - -- 文字镜像翻转 - -- 文字降噪处理 - -- 敏感词标签支持 +- [ ] 敏感词标签支持 # 拓展阅读 diff --git a/pom.xml b/pom.xml index d82fd93..1626cb2 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.7.0 + 0.8.0 diff --git a/release.bat b/release.bat index 3a66af8..9937e82 100644 --- a/release.bat +++ b/release.bat @@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.7.0 +SET version=0.8.0 :::: 新版本名称 -SET newVersion=0.8.0 +SET newVersion=0.9.0 :::: 组织名称 SET groupName=com.github.houbb :::: 项目名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java index dda78d8..c3de23d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWord.java @@ -1,6 +1,6 @@ package com.github.houbb.sensitive.word.api; -import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum; import java.util.List; @@ -16,7 +16,7 @@ public interface ISensitiveWord { * @param context 上下文 * @return 结果 * @since 0.0.1 - * @see ValidModeEnum#FAIL_OVER 建议使用全部检测返回模式 + * @see WordValidModeEnum#FAIL_OVER 建议使用全部检测返回模式 */ List findAll(final String string, final IWordContext context); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/ISensitiveCheck.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordCheck.java similarity index 74% rename from src/main/java/com/github/houbb/sensitive/word/support/check/ISensitiveCheck.java rename to src/main/java/com/github/houbb/sensitive/word/api/IWordCheck.java index ad94de9..ac72e8e 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/ISensitiveCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordCheck.java @@ -1,6 +1,7 @@ -package com.github.houbb.sensitive.word.support.check; +package com.github.houbb.sensitive.word.api; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.support.check.WordCheckResult; /** * 敏感信息监测接口 @@ -13,7 +14,7 @@ import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; * @author binbin.hou * @since 0.0.5 */ -public interface ISensitiveCheck { +public interface IWordCheck { /** * 检查敏感词数量 @@ -31,7 +32,7 @@ public interface ISensitiveCheck { * @return 敏感信息对应的长度 * @since 0.0.5 */ - SensitiveCheckResult sensitiveCheck(final int beginIndex, - final InnerSensitiveContext context); + WordCheckResult sensitiveCheck(final int beginIndex, + final InnerSensitiveWordContext context); } diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java index 1a1da6c..bbd8b75 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java @@ -1,7 +1,5 @@ package com.github.houbb.sensitive.word.api; -import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; - /** * @author binbin.hou * @since 0.0.4 @@ -177,38 +175,38 @@ public interface IWordContext { * @return this * @since 0.3.0 */ - IWordContext sensitiveCheck(final ISensitiveCheck sensitiveCheck); + IWordContext sensitiveCheck(final IWordCheck sensitiveCheck); /** * 获取检测策略 * @return 检测策略 * @since 0.3.0 */ - ISensitiveCheck sensitiveCheck(); + IWordCheck sensitiveCheck(); /** * 设置敏感词替换策略 - * @param sensitiveWordReplace 策略 + * @param wordReplace 策略 * @return this * @since 0.3.0 */ - IWordContext sensitiveWordReplace(final ISensitiveWordReplace sensitiveWordReplace); + IWordContext wordReplace(final IWordReplace wordReplace); /** * 敏感词替换策略 * @return 替换策略 * @since 0.3.0 */ - ISensitiveWordReplace sensitiveWordReplace(); + IWordReplace wordReplace(); /** * 设置统一的字符处理 * - * @param charFormat 字符处理 + * @param wordFormat 字符处理 * @return 结果 * @since 0.3.0 */ - IWordContext charFormat(final ICharFormat charFormat); + IWordContext wordFormat(final IWordFormat wordFormat); /** * 获取格式化策略 @@ -216,7 +214,7 @@ public interface IWordContext { * @return 策略 * @since 0.3.0 */ - ICharFormat charFormat(); + IWordFormat wordFormat(); /** * 获取 wordMap 策略 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java index 3095e9a..0a4f90c 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java @@ -1,7 +1,7 @@ package com.github.houbb.sensitive.word.api; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; -import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; import java.util.Collection; @@ -27,9 +27,9 @@ public interface IWordData { * @param innerContext 上下文 * @return 是否包含 * @since 0.5.0 - * @see ValidModeEnum#FAIL_FAST 建议使用快速返回模式 + * @see WordValidModeEnum#FAIL_FAST 建议使用快速返回模式 */ WordContainsTypeEnum contains(final StringBuilder stringBuilder, - final InnerSensitiveContext innerContext); + final InnerSensitiveWordContext innerContext); } diff --git a/src/main/java/com/github/houbb/sensitive/word/api/ICharFormat.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordFormat.java similarity index 93% rename from src/main/java/com/github/houbb/sensitive/word/api/ICharFormat.java rename to src/main/java/com/github/houbb/sensitive/word/api/IWordFormat.java index a6e4513..25afa79 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/ICharFormat.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordFormat.java @@ -10,7 +10,7 @@ package com.github.houbb.sensitive.word.api; * @author binbin.hou * @since 0.0.5 */ -public interface ICharFormat { +public interface IWordFormat { /** * 针对 char 格式化 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordReplace.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordReplace.java similarity index 93% rename from src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordReplace.java rename to src/main/java/com/github/houbb/sensitive/word/api/IWordReplace.java index b82d31c..e847db3 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordReplace.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordReplace.java @@ -6,7 +6,7 @@ package com.github.houbb.sensitive.word.api; * @author binbin.hou * @since 0.2.0 */ -public interface ISensitiveWordReplace { +public interface IWordReplace { /** * 替换 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/combine/IWordAllowDenyCombine.java b/src/main/java/com/github/houbb/sensitive/word/api/combine/IWordAllowDenyCombine.java new file mode 100644 index 0000000..4329aeb --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/api/combine/IWordAllowDenyCombine.java @@ -0,0 +1,28 @@ +package com.github.houbb.sensitive.word.api.combine; + +import com.github.houbb.sensitive.word.api.IWordAllow; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.api.IWordDeny; + +import java.util.Collection; +import java.util.List; + +/** + * @author d + * @since 0.8.0 + */ +public interface IWordAllowDenyCombine { + + /** + * 获取最终的拒绝单词列表 + * @param wordAllow 允许 + * @param wordDeny 拒绝 + * @param context 上下文 + * @return 结果 + * @since 0.8.0 + */ + Collection getActualDenyList(IWordAllow wordAllow, + IWordDeny wordDeny, + final IWordContext context); + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/api/combine/IWordCheckCombine.java b/src/main/java/com/github/houbb/sensitive/word/api/combine/IWordCheckCombine.java new file mode 100644 index 0000000..beb3714 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/api/combine/IWordCheckCombine.java @@ -0,0 +1,21 @@ +package com.github.houbb.sensitive.word.api.combine; + +import com.github.houbb.sensitive.word.api.IWordCheck; +import com.github.houbb.sensitive.word.api.IWordContext; + +/** + * @author d + * @since 0.8.0 + */ +public interface IWordCheckCombine { + + /** + * 初始化敏感检测策略 + * @param context 上下文 + * + * @return 实现 + * @since 0.8.0 + */ + IWordCheck initWordCheck(final IWordContext context); + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/api/combine/IWordFormatCombine.java b/src/main/java/com/github/houbb/sensitive/word/api/combine/IWordFormatCombine.java new file mode 100644 index 0000000..9a99088 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/api/combine/IWordFormatCombine.java @@ -0,0 +1,20 @@ +package com.github.houbb.sensitive.word.api.combine; + +import com.github.houbb.sensitive.word.api.IWordFormat; +import com.github.houbb.sensitive.word.api.IWordContext; + +/** + * @author d + * @since 0.8.0 + */ +public interface IWordFormatCombine { + + /** + * 初始化 charFormat + * @param context 上下文 + * @return 结果 + * @since 0.8.0 + */ + IWordFormat initWordFormat(final IWordContext context); + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/api/context/InnerSensitiveContext.java b/src/main/java/com/github/houbb/sensitive/word/api/context/InnerSensitiveWordContext.java similarity index 62% rename from src/main/java/com/github/houbb/sensitive/word/api/context/InnerSensitiveContext.java rename to src/main/java/com/github/houbb/sensitive/word/api/context/InnerSensitiveWordContext.java index 44553f8..224d64c 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/context/InnerSensitiveContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/context/InnerSensitiveWordContext.java @@ -1,7 +1,7 @@ package com.github.houbb.sensitive.word.api.context; import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum; import java.util.Map; @@ -11,7 +11,7 @@ import java.util.Map; * @author binbin.hou * @since 0.6.0 */ -public class InnerSensitiveContext { +public class InnerSensitiveWordContext { /** * 原始文本 @@ -24,21 +24,21 @@ public class InnerSensitiveContext { /** * 校验模式 */ - private ValidModeEnum modeEnum; + private WordValidModeEnum modeEnum; /** * 原始上下文 */ private IWordContext wordContext; - public static InnerSensitiveContext newInstance() { - return new InnerSensitiveContext(); + public static InnerSensitiveWordContext newInstance() { + return new InnerSensitiveWordContext(); } public String originalText() { return originalText; } - public InnerSensitiveContext originalText(String text) { + public InnerSensitiveWordContext originalText(String text) { this.originalText = text; return this; } @@ -47,16 +47,16 @@ public class InnerSensitiveContext { return formatCharMapping; } - public InnerSensitiveContext formatCharMapping(Map formatCharMapping) { + public InnerSensitiveWordContext formatCharMapping(Map formatCharMapping) { this.formatCharMapping = formatCharMapping; return this; } - public ValidModeEnum modeEnum() { + public WordValidModeEnum modeEnum() { return modeEnum; } - public InnerSensitiveContext modeEnum(ValidModeEnum modeEnum) { + public InnerSensitiveWordContext modeEnum(WordValidModeEnum modeEnum) { this.modeEnum = modeEnum; return this; } @@ -65,7 +65,7 @@ public class InnerSensitiveContext { return wordContext; } - public InnerSensitiveContext wordContext(IWordContext context) { + public InnerSensitiveWordContext wordContext(IWordContext context) { this.wordContext = context; return this; } diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index e17dda7..29bd564 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -4,17 +4,20 @@ import com.github.houbb.heaven.support.handler.IHandler; import com.github.houbb.heaven.util.common.ArgUtil; import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.sensitive.word.api.*; +import com.github.houbb.sensitive.word.api.combine.IWordAllowDenyCombine; +import com.github.houbb.sensitive.word.api.combine.IWordCheckCombine; +import com.github.houbb.sensitive.word.api.combine.IWordFormatCombine; import com.github.houbb.sensitive.word.core.SensitiveWords; import com.github.houbb.sensitive.word.support.allow.WordAllows; -import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; -import com.github.houbb.sensitive.word.support.check.impl.SensitiveChecks; -import com.github.houbb.sensitive.word.support.deny.WordDenys; -import com.github.houbb.sensitive.word.support.format.CharFormats; +import com.github.houbb.sensitive.word.support.combine.allowdeny.WordAllowDenyCombines; +import com.github.houbb.sensitive.word.support.combine.check.WordCheckCombines; +import com.github.houbb.sensitive.word.support.combine.format.WordFormatCombines; import com.github.houbb.sensitive.word.support.data.WordDatas; -import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaces; +import com.github.houbb.sensitive.word.support.deny.WordDenys; +import com.github.houbb.sensitive.word.support.replace.WordReplaces; import com.github.houbb.sensitive.word.support.result.WordResultHandlers; -import com.github.houbb.sensitive.word.utils.InnerWordDataUtils; +import java.util.Collection; import java.util.List; /** @@ -117,7 +120,7 @@ public class SensitiveWordBs { * 替换策略 * @since 0.3.0 */ - private ISensitiveWordReplace sensitiveWordReplace = SensitiveWordReplaces.defaults(); + private IWordReplace wordReplace = WordReplaces.defaults(); /** * 上下文 @@ -125,6 +128,24 @@ public class SensitiveWordBs { */ private IWordContext context = SensitiveWordContext.newInstance(); + /** + * 单词检测组合策略 + * @since 0.8.0 + */ + private IWordCheckCombine wordCheckCombine = WordCheckCombines.defaults(); + + /** + * 单词格式化组合策略 + * @since 0.8.0 + */ + private IWordFormatCombine wordFormatCombine = WordFormatCombines.defaults(); + + /** + * 单词组合策略 + * @since 0.8.0 + */ + private IWordAllowDenyCombine wordAllowDenyCombine = WordAllowDenyCombines.defaults(); + /** * 新建验证实例 *

@@ -137,7 +158,6 @@ public class SensitiveWordBs { return new SensitiveWordBs(); } - /** * 初始化 * @@ -146,19 +166,23 @@ public class SensitiveWordBs { * @return this */ public SensitiveWordBs init() { - // 初始化 context - this.initContext(); + // 1. 初始化 context + IWordContext context = this.initContext(); - // 替换策略 - final ICharFormat charFormat = CharFormats.initCharFormat(context); - context.charFormat(charFormat); + // 2. 格式化策略 + final IWordFormat charFormat = wordFormatCombine.initWordFormat(context); + context.wordFormat(charFormat); - // 3. 初始化对应的 sensitiveCheck - final ISensitiveCheck sensitiveCheck = SensitiveChecks.initSensitiveCheck(context); + // 3. 初始化对应的 Check 策略 + final IWordCheck sensitiveCheck = wordCheckCombine.initWordCheck(context); context.sensitiveCheck(sensitiveCheck); - //2. 初始化 word - this.initWordMap(); + // 4. 初始化 word + Collection denyList = wordAllowDenyCombine.getActualDenyList(wordAllow, wordDeny, context); + wordData.initWordData(denyList); + + //5. 更新 context + this.context = context; return this; } @@ -170,7 +194,7 @@ public class SensitiveWordBs { * @since 0.0.4 */ private IWordContext initContext() { - this.context = SensitiveWordContext.newInstance(); + IWordContext context = SensitiveWordContext.newInstance(); // 格式统一化 context.ignoreCase(ignoreCase); @@ -188,26 +212,31 @@ public class SensitiveWordBs { // 额外配置 context.sensitiveCheckNumLen(numCheckLen); - context.sensitiveWordReplace(sensitiveWordReplace); + context.wordReplace(wordReplace); context.wordData(wordData); return context; } - /** - * DCL 初始化 wordMap 信息 - * - * 注意:map 的构建是一个比较耗时的动作 - * @since 0.0.4 - */ - private synchronized void initWordMap() { - // 加载配置信息 - List denyList = wordDeny.deny(); - List allowList = wordAllow.allow(); - List results = InnerWordDataUtils.getActualDenyList(denyList, allowList, context); + public SensitiveWordBs wordCheckCombine(IWordCheckCombine wordCheckCombine) { + ArgUtil.notNull(wordCheckCombine, "wordCheckCombine"); - // 便于可以多次初始化 - wordData.initWordData(results); + this.wordCheckCombine = wordCheckCombine; + return this; + } + + public SensitiveWordBs wordFormatCombine(IWordFormatCombine wordFormatCombine) { + ArgUtil.notNull(wordFormatCombine, "wordFormatCombine"); + + this.wordFormatCombine = wordFormatCombine; + return this; + } + + public SensitiveWordBs wordAllowDenyCombine(IWordAllowDenyCombine wordAllowDenyCombine) { + ArgUtil.notNull(wordAllowDenyCombine, "wordAllowDenyCombine"); + + this.wordAllowDenyCombine = wordAllowDenyCombine; + return this; } /** @@ -232,12 +261,12 @@ public class SensitiveWordBs { /** * 设置替换策略 - * @param sensitiveWordReplace 替换 + * @param wordReplace 替换 * @return 结果 */ - public SensitiveWordBs sensitiveWordReplace(ISensitiveWordReplace sensitiveWordReplace) { - ArgUtil.notNull(sensitiveWordReplace, "sensitiveWordReplace"); - this.sensitiveWordReplace = sensitiveWordReplace; + public SensitiveWordBs wordReplace(IWordReplace wordReplace) { + ArgUtil.notNull(wordReplace, "wordReplace"); + this.wordReplace = wordReplace; return this; } @@ -265,6 +294,8 @@ public class SensitiveWordBs { return this; } + //-------------------------------------------------------- 基础属性设置 + /** * 设置是否启动数字检测 * diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java index 545cc38..59bc673 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java @@ -1,10 +1,6 @@ package com.github.houbb.sensitive.word.bs; -import com.github.houbb.sensitive.word.api.ICharFormat; -import com.github.houbb.sensitive.word.api.ISensitiveWordReplace; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.api.IWordData; -import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; +import com.github.houbb.sensitive.word.api.*; /** * 上下文 @@ -83,19 +79,19 @@ public class SensitiveWordContext implements IWordContext { * 检测策略 * @since 0.3.0 */ - private ISensitiveCheck sensitiveCheck; + private IWordCheck wordCheck; /** * 替换策略 * @since 0.3.0 */ - private ISensitiveWordReplace sensitiveWordReplace; + private IWordReplace wordReplace; /** * 格式化 * @since 0.3.0 */ - private ICharFormat charFormat; + private IWordFormat wordFormat; /** * 单词 map 信息 @@ -114,22 +110,22 @@ public class SensitiveWordContext implements IWordContext { } @Override - public ISensitiveWordReplace sensitiveWordReplace() { - return sensitiveWordReplace; + public IWordReplace wordReplace() { + return wordReplace; } - public SensitiveWordContext sensitiveWordReplace(ISensitiveWordReplace sensitiveWordReplace) { - this.sensitiveWordReplace = sensitiveWordReplace; + public SensitiveWordContext wordReplace(IWordReplace wordReplace) { + this.wordReplace = wordReplace; return this; } @Override - public ISensitiveCheck sensitiveCheck() { - return sensitiveCheck; + public IWordCheck sensitiveCheck() { + return wordCheck; } - public SensitiveWordContext sensitiveCheck(ISensitiveCheck sensitiveCheck) { - this.sensitiveCheck = sensitiveCheck; + public SensitiveWordContext sensitiveCheck(IWordCheck sensitiveCheck) { + this.wordCheck = sensitiveCheck; return this; } @@ -269,12 +265,12 @@ public class SensitiveWordContext implements IWordContext { } @Override - public ICharFormat charFormat() { - return charFormat; + public IWordFormat wordFormat() { + return wordFormat; } - public SensitiveWordContext charFormat(ICharFormat charFormat) { - this.charFormat = charFormat; + public SensitiveWordContext wordFormat(IWordFormat wordFormat) { + this.wordFormat = wordFormat; return this; } } diff --git a/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java b/src/main/java/com/github/houbb/sensitive/word/constant/WordConst.java similarity index 92% rename from src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java rename to src/main/java/com/github/houbb/sensitive/word/constant/WordConst.java index 4699de8..ff17334 100644 --- a/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java +++ b/src/main/java/com/github/houbb/sensitive/word/constant/WordConst.java @@ -7,9 +7,9 @@ package com.github.houbb.sensitive.word.constant; * @author Administrator * @since 0.0.1 */ -public final class AppConst { +public final class WordConst { - private AppConst(){} + private WordConst(){} /** * 是否为结束标识 diff --git a/src/main/java/com/github/houbb/sensitive/word/constant/enums/ValidModeEnum.java b/src/main/java/com/github/houbb/sensitive/word/constant/enums/WordValidModeEnum.java similarity index 91% rename from src/main/java/com/github/houbb/sensitive/word/constant/enums/ValidModeEnum.java rename to src/main/java/com/github/houbb/sensitive/word/constant/enums/WordValidModeEnum.java index 0c0cdfd..479de1b 100644 --- a/src/main/java/com/github/houbb/sensitive/word/constant/enums/ValidModeEnum.java +++ b/src/main/java/com/github/houbb/sensitive/word/constant/enums/WordValidModeEnum.java @@ -7,7 +7,7 @@ package com.github.houbb.sensitive.word.constant.enums; * @author Administrator * @since 0.0.1 */ -public enum ValidModeEnum { +public enum WordValidModeEnum { /** * 快速失败 diff --git a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java index a6a2c5d..d73e182 100644 --- a/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/core/AbstractSensitiveWord.java @@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.core; import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.sensitive.word.api.ISensitiveWord; -import com.github.houbb.sensitive.word.api.ISensitiveWordReplace; +import com.github.houbb.sensitive.word.api.IWordReplace; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordResult; @@ -37,7 +37,7 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord { protected String doReplace(String target, List allList, IWordContext context) { // 根据 index 直接分割 - final ISensitiveWordReplace replace = context.sensitiveWordReplace(); + final IWordReplace replace = context.wordReplace(); // 是否需要对 allList 排序? StringBuilder stringBuilder = new StringBuilder(); diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java index a9e95b7..338ffdb 100644 --- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java @@ -1,15 +1,15 @@ package com.github.houbb.sensitive.word.core; import com.github.houbb.heaven.util.guava.Guavas; +import com.github.houbb.sensitive.word.api.IWordCheck; import com.github.houbb.sensitive.word.api.ISensitiveWord; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordResult; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; -import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; -import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; -import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum; +import com.github.houbb.sensitive.word.support.check.WordCheckResult; import com.github.houbb.sensitive.word.support.result.WordResult; -import com.github.houbb.sensitive.word.utils.InnerFormatUtils; +import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils; import java.util.List; import java.util.Map; @@ -32,7 +32,7 @@ public class SensitiveWord extends AbstractSensitiveWord { @Override protected List doFindAll(String string, IWordContext context) { - return innerSensitiveWords(string, ValidModeEnum.FAIL_OVER, context); + return innerSensitiveWords(string, WordValidModeEnum.FAIL_OVER, context); } /** @@ -44,24 +44,24 @@ public class SensitiveWord extends AbstractSensitiveWord { * @since 0.0.1 */ private List innerSensitiveWords(final String text, - final ValidModeEnum modeEnum, + final WordValidModeEnum modeEnum, final IWordContext context) { //1. 是否存在敏感词,如果比存在,直接返回空列表 - final ISensitiveCheck sensitiveCheck = context.sensitiveCheck(); + final IWordCheck sensitiveCheck = context.sensitiveCheck(); List resultList = Guavas.newArrayList(); //TODO: 这里拆分为2个部分,从而保障性能。但是要注意处理下标的问题。 //1. 原始的敏感词部分 //2. email/url/num 的单独一次遍历处理。 - final Map characterCharacterMap = InnerFormatUtils.formatCharsMapping(text, context); - final InnerSensitiveContext checkContext = InnerSensitiveContext.newInstance() + final Map characterCharacterMap = InnerWordFormatUtils.formatCharsMapping(text, context); + final InnerSensitiveWordContext checkContext = InnerSensitiveWordContext.newInstance() .originalText(text) .wordContext(context) - .modeEnum(ValidModeEnum.FAIL_OVER) + .modeEnum(WordValidModeEnum.FAIL_OVER) .formatCharMapping(characterCharacterMap); for (int i = 0; i < text.length(); i++) { - SensitiveCheckResult checkResult = sensitiveCheck.sensitiveCheck(i, checkContext); + WordCheckResult checkResult = sensitiveCheck.sensitiveCheck(i, checkContext); // 命中 int wordLength = checkResult.index(); @@ -73,7 +73,7 @@ public class SensitiveWord extends AbstractSensitiveWord { resultList.add(wordResult); // 快速返回 - if (ValidModeEnum.FAIL_FAST.equals(modeEnum)) { + if (WordValidModeEnum.FAIL_FAST.equals(modeEnum)) { break; } diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java index bad461a..26e2592 100644 --- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java +++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java @@ -1,9 +1,9 @@ package com.github.houbb.sensitive.word.core; -import com.github.houbb.sensitive.word.api.ISensitiveWordReplace; +import com.github.houbb.sensitive.word.api.IWordReplace; import com.github.houbb.sensitive.word.api.IWordResultHandler; import com.github.houbb.sensitive.word.bs.SensitiveWordBs; -import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaces; +import com.github.houbb.sensitive.word.support.replace.WordReplaces; import java.util.List; @@ -69,8 +69,8 @@ public final class SensitiveWordHelper { * @return 替换后结果 * @since 0.2.0 */ - public static String replace(final String target, final ISensitiveWordReplace replace) { - SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().sensitiveWordReplace(replace).init(); + public static String replace(final String target, final IWordReplace replace) { + SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().wordReplace(replace).init(); return sensitiveWordBs.replace(target); } @@ -83,7 +83,7 @@ public final class SensitiveWordHelper { * @since 0.0.13 */ public static String replace(final String target, final char replaceChar) { - final ISensitiveWordReplace replace = SensitiveWordReplaces.chars(replaceChar); + final IWordReplace replace = WordReplaces.chars(replaceChar); return replace(target, replace); } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractConditionSensitiveCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractConditionWordCheck.java similarity index 87% rename from src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractConditionSensitiveCheck.java rename to src/main/java/com/github/houbb/sensitive/word/support/check/AbstractConditionWordCheck.java index 1bb82c7..f793326 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractConditionSensitiveCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractConditionWordCheck.java @@ -1,8 +1,8 @@ -package com.github.houbb.sensitive.word.support.check.impl; +package com.github.houbb.sensitive.word.support.check; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; import java.util.Map; @@ -13,7 +13,7 @@ import java.util.Map; * @since 0.3.2 */ @ThreadSafe -public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveCheck { +public abstract class AbstractConditionWordCheck extends AbstractWordCheck { /** * 当前字符串是否符合规范 @@ -23,7 +23,7 @@ public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveC * @return 结果 * @since 0.3.2 */ - protected abstract boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext); + protected abstract boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext); /** * 这里指定一个阈值条件 @@ -34,10 +34,10 @@ public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveC * @since 0.3.2 */ protected abstract boolean isStringCondition(int index, - final StringBuilder stringBuilder, InnerSensitiveContext checkContext); + final StringBuilder stringBuilder, InnerSensitiveWordContext checkContext); @Override - protected int getActualLength(int beginIndex, InnerSensitiveContext checkContext) { + protected int getActualLength(int beginIndex, InnerSensitiveWordContext checkContext) { final String txt = checkContext.originalText(); final IWordContext context = checkContext.wordContext(); final Map formatCharMapping = checkContext.formatCharMapping(); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractSensitiveCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java similarity index 50% rename from src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractSensitiveCheck.java rename to src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java index 270ca53..2c8c044 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractSensitiveCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/AbstractWordCheck.java @@ -1,10 +1,9 @@ -package com.github.houbb.sensitive.word.support.check.impl; +package com.github.houbb.sensitive.word.support.check; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.util.lang.StringUtil; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; -import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; -import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; +import com.github.houbb.sensitive.word.api.IWordCheck; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; /** * 抽象实现策略 @@ -13,14 +12,14 @@ import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; * @since 0.4.0 */ @ThreadSafe -public abstract class AbstractSensitiveCheck implements ISensitiveCheck { +public abstract class AbstractWordCheck implements IWordCheck { /** * 获取校验类 * @return 类 * @since 0.3.2 */ - protected abstract Class getSensitiveCheckClass(); + protected abstract Class getSensitiveCheckClass(); /** * 获取确切的长度 @@ -29,20 +28,20 @@ public abstract class AbstractSensitiveCheck implements ISensitiveCheck { * @return 长度 * @since 0.4.0 */ - protected abstract int getActualLength(int beginIndex, final InnerSensitiveContext checkContext); + protected abstract int getActualLength(int beginIndex, final InnerSensitiveWordContext checkContext); @Override - public SensitiveCheckResult sensitiveCheck(int beginIndex, - final InnerSensitiveContext checkContext) { - Class clazz = getSensitiveCheckClass(); + public WordCheckResult sensitiveCheck(int beginIndex, + final InnerSensitiveWordContext checkContext) { + Class clazz = getSensitiveCheckClass(); final String txt = checkContext.originalText(); if(StringUtil.isEmpty(txt)) { - return SensitiveCheckResult.of(0, clazz); + return WordCheckResult.of(0, clazz); } int actualLength = getActualLength(beginIndex, checkContext); - return SensitiveCheckResult.of(actualLength, clazz); + return WordCheckResult.of(actualLength, clazz); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckEmail.java similarity index 66% rename from src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java rename to src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckEmail.java index 6a9a631..4037deb 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckEmail.java @@ -1,11 +1,11 @@ -package com.github.houbb.sensitive.word.support.check.impl; +package com.github.houbb.sensitive.word.support.check; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.util.lang.CharUtil; import com.github.houbb.heaven.util.util.regex.RegexUtil; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; -import com.github.houbb.sensitive.word.constant.AppConst; -import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; +import com.github.houbb.sensitive.word.api.IWordCheck; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.constant.WordConst; /** * email 正则表达式检测实现。 @@ -23,36 +23,36 @@ import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; * @since 0.0.9 */ @ThreadSafe -public class SensitiveCheckEmail extends AbstractConditionSensitiveCheck { +public class WordCheckEmail extends AbstractConditionWordCheck { /** * @since 0.3.0 */ - private static final ISensitiveCheck INSTANCE = new SensitiveCheckEmail(); + private static final IWordCheck INSTANCE = new WordCheckEmail(); - public static ISensitiveCheck getInstance() { + public static IWordCheck getInstance() { return INSTANCE; } @Override - protected Class getSensitiveCheckClass() { - return SensitiveCheckEmail.class; + protected Class getSensitiveCheckClass() { + return WordCheckEmail.class; } @Override - protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext) { + protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) { return CharUtil.isEmilChar(mappingChar); } @Override - protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveContext checkContext) { + protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveWordContext checkContext) { int bufferLen = stringBuilder.length(); //x@a.cn if(bufferLen < 6) { return false; } - if(bufferLen > AppConst.MAX_EMAIL_LEN) { + if(bufferLen > WordConst.MAX_EMAIL_LEN) { return false; } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckInit.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckInit.java new file mode 100644 index 0000000..8298fba --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckInit.java @@ -0,0 +1,47 @@ +package com.github.houbb.sensitive.word.support.check; + +import com.github.houbb.heaven.support.pipeline.Pipeline; +import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline; +import com.github.houbb.sensitive.word.api.IWordCheck; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; + +import java.util.List; + +/** + * 检测初始化类 + * @since 0.3.0 + */ +public abstract class WordCheckInit implements IWordCheck { + + /** + * 初始化列表 + * + * @param pipeline 当前列表泳道 + * @since 0.0.13 + */ + protected abstract void init(final Pipeline pipeline); + + + @Override + public WordCheckResult sensitiveCheck(final int beginIndex, + final InnerSensitiveWordContext checkContext) { + + Pipeline pipeline = new DefaultPipeline<>(); + this.init(pipeline); + List sensitiveChecks = pipeline.list(); + + // 循环调用 + for(IWordCheck sensitiveCheck : sensitiveChecks) { + WordCheckResult result = sensitiveCheck.sensitiveCheck(beginIndex, checkContext); + + if(result.index() > 0) { + return result; + } + } + + // 这里直接进行正则表达式相关的调用。 + // 默认返回 0 + return WordCheckNone.getNoneResult(); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNone.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNone.java new file mode 100644 index 0000000..24f5e54 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNone.java @@ -0,0 +1,38 @@ +package com.github.houbb.sensitive.word.support.check; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.sensitive.word.api.IWordCheck; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; + +/** + * 未匹配 + * + * @author binbin.hou + * @since 0.3.0 + */ +@ThreadSafe +public class WordCheckNone implements IWordCheck { + + /** + * @since 0.3.0 + */ + private static final IWordCheck INSTANCE = new WordCheckNone(); + + public static IWordCheck getInstance() { + return INSTANCE; + } + + /** + * 只有一个未匹配 + */ + private static final WordCheckResult NONE_RESULT = WordCheckResult.of(0, WordCheckNone.class); + + public static WordCheckResult getNoneResult() { + return NONE_RESULT; + } + + @Override + public WordCheckResult sensitiveCheck(int beginIndex, InnerSensitiveWordContext context) { + return NONE_RESULT; + } +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNum.java similarity index 55% rename from src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java rename to src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNum.java index 6f086b8..808eac9 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckNum.java @@ -1,8 +1,8 @@ -package com.github.houbb.sensitive.word.support.check.impl; +package com.github.houbb.sensitive.word.support.check; import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; -import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; +import com.github.houbb.sensitive.word.api.IWordCheck; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; /** * 敏感词监测实现 @@ -12,29 +12,29 @@ import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; * @since 0.0.5 */ @ThreadSafe -public class SensitiveCheckNum extends AbstractConditionSensitiveCheck { +public class WordCheckNum extends AbstractConditionWordCheck { /** * @since 0.3.0 */ - private static final ISensitiveCheck INSTANCE = new SensitiveCheckNum(); + private static final IWordCheck INSTANCE = new WordCheckNum(); - public static ISensitiveCheck getInstance() { + public static IWordCheck getInstance() { return INSTANCE; } @Override - protected Class getSensitiveCheckClass() { - return SensitiveCheckNum.class; + protected Class getSensitiveCheckClass() { + return WordCheckNum.class; } @Override - protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext) { + protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) { return Character.isDigit(mappingChar); } @Override - protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveContext checkContext) { + protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveWordContext checkContext) { int bufferLen = stringBuilder.length(); return bufferLen >= checkContext.wordContext().sensitiveCheckNumLen(); } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveCheckResult.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckResult.java similarity index 65% rename from src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveCheckResult.java rename to src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckResult.java index 88d24fb..f07110d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveCheckResult.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckResult.java @@ -1,5 +1,7 @@ package com.github.houbb.sensitive.word.support.check; +import com.github.houbb.sensitive.word.api.IWordCheck; + /** * 敏感信息监测接口结果 * @@ -7,7 +9,7 @@ package com.github.houbb.sensitive.word.support.check; * @author binbin.hou * @since 0.0.12 */ -public class SensitiveCheckResult { +public class WordCheckResult { /** * 下标 @@ -19,7 +21,7 @@ public class SensitiveCheckResult { * 检测类 * @since 0.0.12 */ - private Class checkClass; + private Class checkClass; /** * 实例化 @@ -28,9 +30,9 @@ public class SensitiveCheckResult { * @return 结果 * @since 0.0.12 */ - public static SensitiveCheckResult of(final int index, - final Class checkClass) { - SensitiveCheckResult result = new SensitiveCheckResult(); + public static WordCheckResult of(final int index, + final Class checkClass) { + WordCheckResult result = new WordCheckResult(); result.index(index).checkClass(checkClass); return result; } @@ -39,16 +41,16 @@ public class SensitiveCheckResult { return index; } - public SensitiveCheckResult index(int index) { + public WordCheckResult index(int index) { this.index = index; return this; } - public Class checkClass() { + public Class checkClass() { return checkClass; } - public SensitiveCheckResult checkClass(Class checkClass) { + public WordCheckResult checkClass(Class checkClass) { this.checkClass = checkClass; return this; } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java similarity index 62% rename from src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java rename to src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java index 94c1519..88db3bf 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java @@ -1,11 +1,11 @@ -package com.github.houbb.sensitive.word.support.check.impl; +package com.github.houbb.sensitive.word.support.check; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.util.lang.CharUtil; import com.github.houbb.heaven.util.util.regex.RegexUtil; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; -import com.github.houbb.sensitive.word.constant.AppConst; -import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; +import com.github.houbb.sensitive.word.api.IWordCheck; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.constant.WordConst; /** * URL 正则表达式检测实现。 @@ -20,35 +20,35 @@ import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; * @since 0.0.9 */ @ThreadSafe -public class SensitiveCheckUrl extends AbstractConditionSensitiveCheck { +public class WordCheckUrl extends AbstractConditionWordCheck { /** * @since 0.3.0 */ - private static final ISensitiveCheck INSTANCE = new SensitiveCheckUrl(); + private static final IWordCheck INSTANCE = new WordCheckUrl(); - public static ISensitiveCheck getInstance() { + public static IWordCheck getInstance() { return INSTANCE; } @Override - protected Class getSensitiveCheckClass() { - return SensitiveCheckUrl.class; + protected Class getSensitiveCheckClass() { + return WordCheckUrl.class; } @Override - protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveContext checkContext) { + protected boolean isCharCondition(char mappingChar, int index, InnerSensitiveWordContext checkContext) { return CharUtil.isWebSiteChar(mappingChar); } @Override - protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveContext checkContext) { + protected boolean isStringCondition(int index, StringBuilder stringBuilder, InnerSensitiveWordContext checkContext) { int bufferLen = stringBuilder.length(); //a.cn if(bufferLen < 4) { return false; } - if(bufferLen > AppConst.MAX_WEB_SITE_LEN) { + if(bufferLen > WordConst.MAX_WEB_SITE_LEN) { return false; } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java similarity index 74% rename from src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java rename to src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java index c7330e1..e3b4b4e 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java @@ -1,12 +1,12 @@ -package com.github.houbb.sensitive.word.support.check.impl; +package com.github.houbb.sensitive.word.support.check; import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.sensitive.word.api.IWordCheck; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordData; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; -import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; -import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; import java.util.Map; @@ -16,27 +16,27 @@ import java.util.Map; * @since 0.0.5 */ @ThreadSafe -public class SensitiveCheckWord extends AbstractSensitiveCheck { +public class WordCheckWord extends AbstractWordCheck { /** * @since 0.3.0 */ - private static final ISensitiveCheck INSTANCE = new SensitiveCheckWord(); + private static final IWordCheck INSTANCE = new WordCheckWord(); - public static ISensitiveCheck getInstance() { + public static IWordCheck getInstance() { return INSTANCE; } @Override - protected Class getSensitiveCheckClass() { - return SensitiveCheckWord.class; + protected Class getSensitiveCheckClass() { + return WordCheckWord.class; } @Override - protected int getActualLength(int beginIndex, InnerSensitiveContext innerContext) { + protected int getActualLength(int beginIndex, InnerSensitiveWordContext innerContext) { final String txt = innerContext.originalText(); final Map formatCharMapping = innerContext.formatCharMapping(); - final ValidModeEnum validModeEnum = innerContext.modeEnum(); + final WordValidModeEnum wordValidModeEnum = innerContext.modeEnum(); final IWordContext context = innerContext.wordContext(); // 采用 ThreadLocal 应该可以提升性能,减少对象的创建。 @@ -58,7 +58,7 @@ public class SensitiveCheckWord extends AbstractSensitiveCheck { actualLength = stringBuilder.length(); // 是否遍历全部匹配的模式 - if(ValidModeEnum.FAIL_FAST.equals(validModeEnum)) { + if(WordValidModeEnum.FAIL_FAST.equals(wordValidModeEnum)) { break; } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java new file mode 100644 index 0000000..e5d4ebd --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java @@ -0,0 +1,97 @@ +package com.github.houbb.sensitive.word.support.check; + +import com.github.houbb.heaven.support.pipeline.Pipeline; +import com.github.houbb.heaven.util.util.ArrayUtil; +import com.github.houbb.heaven.util.util.CollectionUtil; +import com.github.houbb.sensitive.word.api.IWordCheck; +import com.github.houbb.sensitive.word.api.IWordContext; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +/** + * 敏感词检测工具 + * @since 0.3.0 + */ +public final class WordChecks { + + private WordChecks(){} + + /** + * 初始化敏感检测策略 + * @param context 上下文 + * + * @return 实现 + * @since 0.3.0 + */ + public static IWordCheck initSensitiveCheck(final IWordContext context) { + List sensitiveCheckList = new ArrayList<>(); + + if(context.enableWordCheck()) { + sensitiveCheckList.add(WordChecks.word()); + } + if(context.enableNumCheck()) { + sensitiveCheckList.add(WordChecks.num()); + } + if(context.enableEmailCheck()) { + sensitiveCheckList.add(WordChecks.email()); + } + if(context.enableUrlCheck()) { + sensitiveCheckList.add(WordChecks.url()); + } + + return WordChecks.chains(sensitiveCheckList); + } + + public static IWordCheck chains(final IWordCheck... sensitiveChecks) { + if (ArrayUtil.isEmpty(sensitiveChecks)){ + return none(); + } + + return new WordCheckInit() { + @Override + protected void init(Pipeline pipeline) { + for(IWordCheck check : sensitiveChecks) { + pipeline.addLast(check); + } + } + }; + } + + public static IWordCheck chains(final Collection sensitiveChecks) { + if (CollectionUtil.isEmpty(sensitiveChecks)){ + return none(); + } + + return new WordCheckInit() { + @Override + protected void init(Pipeline pipeline) { + for(IWordCheck check : sensitiveChecks) { + pipeline.addLast(check); + } + } + }; + } + + public static IWordCheck email() { + return WordCheckEmail.getInstance(); + } + + public static IWordCheck num() { + return WordCheckNum.getInstance(); + } + + public static IWordCheck url() { + return WordCheckUrl.getInstance(); + } + + public static IWordCheck word() { + return WordCheckWord.getInstance(); + } + + public static IWordCheck none() { + return WordCheckNone.getInstance(); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java deleted file mode 100644 index db2e9ab..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java +++ /dev/null @@ -1,48 +0,0 @@ -package com.github.houbb.sensitive.word.support.check.impl; - -import com.github.houbb.heaven.support.pipeline.Pipeline; -import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; -import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; -import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; - -import java.util.List; - -/** - * 检测初始化类 - * @since 0.3.0 - */ -public abstract class SensitiveCheckInit implements ISensitiveCheck { - - /** - * 初始化列表 - * - * @param pipeline 当前列表泳道 - * @since 0.0.13 - */ - protected abstract void init(final Pipeline pipeline); - - - @Override - public SensitiveCheckResult sensitiveCheck(final int beginIndex, - final InnerSensitiveContext checkContext) { - - Pipeline pipeline = new DefaultPipeline<>(); - this.init(pipeline); - List sensitiveChecks = pipeline.list(); - - // 循环调用 - for(ISensitiveCheck sensitiveCheck : sensitiveChecks) { - SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(beginIndex, checkContext); - - if(result.index() > 0) { - return result; - } - } - - // 这里直接进行正则表达式相关的调用。 - // 默认返回 0 - return SensitiveCheckNone.getNoneResult(); - } - -} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java deleted file mode 100644 index a640877..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.github.houbb.sensitive.word.support.check.impl; - -import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; -import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; -import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; - -/** - * 未匹配 - * - * @author binbin.hou - * @since 0.3.0 - */ -@ThreadSafe -public class SensitiveCheckNone implements ISensitiveCheck { - - /** - * @since 0.3.0 - */ - private static final ISensitiveCheck INSTANCE = new SensitiveCheckNone(); - - public static ISensitiveCheck getInstance() { - return INSTANCE; - } - - /** - * 只有一个未匹配 - */ - private static final SensitiveCheckResult NONE_RESULT = SensitiveCheckResult.of(0, SensitiveCheckNone.class); - - public static SensitiveCheckResult getNoneResult() { - return NONE_RESULT; - } - - @Override - public SensitiveCheckResult sensitiveCheck(int beginIndex, InnerSensitiveContext context) { - return NONE_RESULT; - } -} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveChecks.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveChecks.java deleted file mode 100644 index 2bf4a9a..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveChecks.java +++ /dev/null @@ -1,97 +0,0 @@ -package com.github.houbb.sensitive.word.support.check.impl; - -import com.github.houbb.heaven.support.pipeline.Pipeline; -import com.github.houbb.heaven.util.util.ArrayUtil; -import com.github.houbb.heaven.util.util.CollectionUtil; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; - -/** - * 敏感词检测工具 - * @since 0.3.0 - */ -public final class SensitiveChecks { - - private SensitiveChecks(){} - - /** - * 初始化敏感检测策略 - * @param context 上下文 - * - * @return 实现 - * @since 0.3.0 - */ - public static ISensitiveCheck initSensitiveCheck(final IWordContext context) { - List sensitiveCheckList = new ArrayList<>(); - - if(context.enableWordCheck()) { - sensitiveCheckList.add(SensitiveChecks.word()); - } - if(context.enableNumCheck()) { - sensitiveCheckList.add(SensitiveChecks.num()); - } - if(context.enableEmailCheck()) { - sensitiveCheckList.add(SensitiveChecks.email()); - } - if(context.enableUrlCheck()) { - sensitiveCheckList.add(SensitiveChecks.url()); - } - - return SensitiveChecks.chains(sensitiveCheckList); - } - - public static ISensitiveCheck chains(final ISensitiveCheck... sensitiveChecks) { - if (ArrayUtil.isEmpty(sensitiveChecks)){ - return none(); - } - - return new SensitiveCheckInit() { - @Override - protected void init(Pipeline pipeline) { - for(ISensitiveCheck check : sensitiveChecks) { - pipeline.addLast(check); - } - } - }; - } - - public static ISensitiveCheck chains(final Collection sensitiveChecks) { - if (CollectionUtil.isEmpty(sensitiveChecks)){ - return none(); - } - - return new SensitiveCheckInit() { - @Override - protected void init(Pipeline pipeline) { - for(ISensitiveCheck check : sensitiveChecks) { - pipeline.addLast(check); - } - } - }; - } - - public static ISensitiveCheck email() { - return SensitiveCheckEmail.getInstance(); - } - - public static ISensitiveCheck num() { - return SensitiveCheckNum.getInstance(); - } - - public static ISensitiveCheck url() { - return SensitiveCheckUrl.getInstance(); - } - - public static ISensitiveCheck word() { - return SensitiveCheckWord.getInstance(); - } - - public static ISensitiveCheck none() { - return SensitiveCheckNone.getInstance(); - } - -} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/combine/allowdeny/AbstractWordAllowDenyCombine.java b/src/main/java/com/github/houbb/sensitive/word/support/combine/allowdeny/AbstractWordAllowDenyCombine.java new file mode 100644 index 0000000..78041bc --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/combine/allowdeny/AbstractWordAllowDenyCombine.java @@ -0,0 +1,44 @@ +package com.github.houbb.sensitive.word.support.combine.allowdeny; + +import com.github.houbb.heaven.util.util.CollectionUtil; +import com.github.houbb.sensitive.word.api.IWordAllow; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.api.IWordDeny; +import com.github.houbb.sensitive.word.api.combine.IWordAllowDenyCombine; +import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils; + +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +/** + * @author d + * @since 0.8.0 + */ +public abstract class AbstractWordAllowDenyCombine implements IWordAllowDenyCombine { + + protected abstract Collection doGetActualDenyList(List allowList, + List denyList, + IWordContext context); + + @Override + public Collection getActualDenyList(IWordAllow wordAllow, + IWordDeny wordDeny, + IWordContext context) { + List allowList = wordAllow.allow(); + List denyList = wordDeny.deny(); + + List formatAllowList = InnerWordFormatUtils.formatWordList(allowList, context); + List formatDenyList = InnerWordFormatUtils.formatWordList(denyList, context); + + if (CollectionUtil.isEmpty(formatDenyList)) { + return Collections.emptyList(); + } + if (CollectionUtil.isEmpty(formatAllowList)) { + return formatDenyList; + } + + return doGetActualDenyList(formatAllowList, formatDenyList, context); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/combine/allowdeny/WordAllowDenyCombine.java b/src/main/java/com/github/houbb/sensitive/word/support/combine/allowdeny/WordAllowDenyCombine.java new file mode 100644 index 0000000..89e3301 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/combine/allowdeny/WordAllowDenyCombine.java @@ -0,0 +1,36 @@ +package com.github.houbb.sensitive.word.support.combine.allowdeny; + +import com.github.houbb.sensitive.word.api.IWordContext; + +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * @author d + * @since 0.8.0 + */ +public class WordAllowDenyCombine extends AbstractWordAllowDenyCombine{ + + @Override + protected Collection doGetActualDenyList(List allowList, + List denyList, + IWordContext context) { + Set resultSet = new HashSet<>(denyList.size()); + + // O(1) + Set allowSet = new HashSet<>(allowList); + + for(String deny : denyList) { + if(allowSet.contains(deny)) { + continue; + } + + resultSet.add(deny); + } + return resultSet; + } + + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/combine/allowdeny/WordAllowDenyCombines.java b/src/main/java/com/github/houbb/sensitive/word/support/combine/allowdeny/WordAllowDenyCombines.java new file mode 100644 index 0000000..d30cbde --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/combine/allowdeny/WordAllowDenyCombines.java @@ -0,0 +1,17 @@ +package com.github.houbb.sensitive.word.support.combine.allowdeny; + +import com.github.houbb.sensitive.word.api.combine.IWordAllowDenyCombine; + +/** + * @author d + * @since 1.0.0 + */ +public final class WordAllowDenyCombines { + + private WordAllowDenyCombines(){} + + public static IWordAllowDenyCombine defaults() { + return new WordAllowDenyCombine(); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/combine/check/AbstractWordCheckCombine.java b/src/main/java/com/github/houbb/sensitive/word/support/combine/check/AbstractWordCheckCombine.java new file mode 100644 index 0000000..dfc57e5 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/combine/check/AbstractWordCheckCombine.java @@ -0,0 +1,25 @@ +package com.github.houbb.sensitive.word.support.combine.check; + +import com.github.houbb.sensitive.word.api.IWordCheck; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.api.combine.IWordCheckCombine; +import com.github.houbb.sensitive.word.support.check.WordChecks; + +import java.util.List; + +/** + * @author d + * @since 0.8.0 + */ +public abstract class AbstractWordCheckCombine implements IWordCheckCombine { + + protected abstract List getWordCheckList(IWordContext context); + + @Override + public IWordCheck initWordCheck(IWordContext context) { + List wordCheckList = getWordCheckList(context); + + return WordChecks.chains(wordCheckList); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java b/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java new file mode 100644 index 0000000..7209e4d --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java @@ -0,0 +1,36 @@ +package com.github.houbb.sensitive.word.support.combine.check; + +import com.github.houbb.sensitive.word.api.IWordCheck; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.support.check.WordChecks; + +import java.util.ArrayList; +import java.util.List; + +/** + * @author d + * @since 0.8.0 + */ +public class WordCheckCombine extends AbstractWordCheckCombine { + + @Override + protected List getWordCheckList(IWordContext context) { + List wordCheckList = new ArrayList<>(); + + if(context.enableWordCheck()) { + wordCheckList.add(WordChecks.word()); + } + if(context.enableNumCheck()) { + wordCheckList.add(WordChecks.num()); + } + if(context.enableEmailCheck()) { + wordCheckList.add(WordChecks.email()); + } + if(context.enableUrlCheck()) { + wordCheckList.add(WordChecks.url()); + } + + return wordCheckList; + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombines.java b/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombines.java new file mode 100644 index 0000000..3b6ed19 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombines.java @@ -0,0 +1,17 @@ +package com.github.houbb.sensitive.word.support.combine.check; + +import com.github.houbb.sensitive.word.api.combine.IWordCheckCombine; + +/** + * @author d + * @since 1.0.0 + */ +public final class WordCheckCombines { + + private WordCheckCombines(){} + + public static IWordCheckCombine defaults() { + return new WordCheckCombine(); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/combine/format/AbstractWordFormatCombine.java b/src/main/java/com/github/houbb/sensitive/word/support/combine/format/AbstractWordFormatCombine.java new file mode 100644 index 0000000..970498e --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/combine/format/AbstractWordFormatCombine.java @@ -0,0 +1,24 @@ +package com.github.houbb.sensitive.word.support.combine.format; + +import com.github.houbb.sensitive.word.api.IWordFormat; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.api.combine.IWordFormatCombine; +import com.github.houbb.sensitive.word.support.format.WordFormats; + +import java.util.List; + +/** + * @author d + * @since 0.8.0 + */ +public abstract class AbstractWordFormatCombine implements IWordFormatCombine { + + protected abstract List getWordFormatList(IWordContext context); + + @Override + public IWordFormat initWordFormat(IWordContext context) { + List list = getWordFormatList(context); + return WordFormats.chains(list); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/combine/format/WordFormatCombine.java b/src/main/java/com/github/houbb/sensitive/word/support/combine/format/WordFormatCombine.java new file mode 100644 index 0000000..06f939a --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/combine/format/WordFormatCombine.java @@ -0,0 +1,38 @@ +package com.github.houbb.sensitive.word.support.combine.format; + +import com.github.houbb.heaven.util.guava.Guavas; +import com.github.houbb.sensitive.word.api.IWordFormat; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.support.format.WordFormats; + +import java.util.List; + +/** + * @author d + * @since 0.8.0 + */ +public class WordFormatCombine extends AbstractWordFormatCombine { + + @Override + protected List getWordFormatList(IWordContext context) { + List charFormats = Guavas.newArrayList(); + if(context.ignoreEnglishStyle()) { + charFormats.add(WordFormats.ignoreEnglishStyle()); + } + if(context.ignoreCase()) { + charFormats.add(WordFormats.ignoreCase()); + } + if(context.ignoreWidth()) { + charFormats.add(WordFormats.ignoreWidth()); + } + if(context.ignoreNumStyle()) { + charFormats.add(WordFormats.ignoreNumStyle()); + } + if(context.ignoreChineseStyle()) { + charFormats.add(WordFormats.ignoreChineseStyle()); + } + + return charFormats; + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/combine/format/WordFormatCombines.java b/src/main/java/com/github/houbb/sensitive/word/support/combine/format/WordFormatCombines.java new file mode 100644 index 0000000..2bbf82b --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/combine/format/WordFormatCombines.java @@ -0,0 +1,20 @@ +package com.github.houbb.sensitive.word.support.combine.format; + +import com.github.houbb.sensitive.word.api.combine.IWordFormatCombine; + +/** + * @author d + * @since 0.8.0 + */ +public final class WordFormatCombines { + + /** + * 默认策略 + * @return 策略 + * @since 0.8.0 + */ + public static IWordFormatCombine defaults() { + return new WordFormatCombine(); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/combine/package-info.java b/src/main/java/com/github/houbb/sensitive/word/support/combine/package-info.java new file mode 100644 index 0000000..a55bac9 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/combine/package-info.java @@ -0,0 +1,5 @@ +/** + * @author d + * @since 1.0.0 + */ +package com.github.houbb.sensitive.word.support.combine; diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java b/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java index bc9acf1..d2659bc 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java @@ -1,9 +1,7 @@ package com.github.houbb.sensitive.word.support.data; -import com.github.houbb.heaven.util.lang.StringUtil; -import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.sensitive.word.api.IWordData; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; import java.util.Collection; @@ -21,7 +19,7 @@ public abstract class AbstractWordData implements IWordData { * @param innerContext 上下文 * @return 结果 */ - protected abstract WordContainsTypeEnum doContains(StringBuilder stringBuilder, InnerSensitiveContext innerContext); + protected abstract WordContainsTypeEnum doContains(StringBuilder stringBuilder, InnerSensitiveWordContext innerContext); /** * 初始化 @@ -37,7 +35,7 @@ public abstract class AbstractWordData implements IWordData { } @Override - public WordContainsTypeEnum contains(StringBuilder stringBuilder, InnerSensitiveContext innerContext) { + public WordContainsTypeEnum contains(StringBuilder stringBuilder, InnerSensitiveWordContext innerContext) { if(stringBuilder == null || stringBuilder.length() <= 0) { return WordContainsTypeEnum.NOT_FOUND; diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java index 93fffbc..0840522 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java @@ -4,8 +4,8 @@ import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.util.lang.ObjectUtil; import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; -import com.github.houbb.sensitive.word.constant.AppConst; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; +import com.github.houbb.sensitive.word.constant.WordConst; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; import java.util.Collection; @@ -69,7 +69,7 @@ public class WordDataHashMap extends AbstractWordData { } else { //不存在则,则构建一个新的map,同时将isEnd设置为0,因为他不是最后一 Map newWordMap = new HashMap<>(8); - newWordMap.put(AppConst.IS_END, false); + newWordMap.put(WordConst.IS_END, false); // 将新的节点放入当前 map 中 currentMap.put(charKey, newWordMap); @@ -80,7 +80,7 @@ public class WordDataHashMap extends AbstractWordData { } // 判断是否为最后一个,添加是否结束的标识。 - currentMap.put(AppConst.IS_END, true); + currentMap.put(WordConst.IS_END, true); } // 最后更新为新的 map,保证更新过程中旧的数据可用 @@ -99,12 +99,12 @@ public class WordDataHashMap extends AbstractWordData { */ @Override public WordContainsTypeEnum doContains(final StringBuilder stringBuilder, - final InnerSensitiveContext innerContext) { + final InnerSensitiveWordContext innerContext) { return innerContainsSensitive(stringBuilder, innerContext); } private WordContainsTypeEnum innerContainsSensitive(StringBuilder stringBuilder, - final InnerSensitiveContext innerContext) { + final InnerSensitiveWordContext innerContext) { // 初始化为当前的 map Map nowMap = this.innerWordMap; @@ -141,7 +141,7 @@ public class WordDataHashMap extends AbstractWordData { return false; } - Object value = map.get(AppConst.IS_END); + Object value = map.get(WordConst.IS_END); if(ObjectUtil.isNull(value)) { return false; } @@ -160,7 +160,7 @@ public class WordDataHashMap extends AbstractWordData { private Map getNowMap(Map nowMap, final int index, final StringBuilder stringBuilder, - final InnerSensitiveContext sensitiveContext) { + final InnerSensitiveWordContext sensitiveContext) { final IWordContext context = sensitiveContext.wordContext(); // 这里的 char 已经是统一格式化之后的,所以可以不用再次格式化。 diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java index 8d949b0..0cc6da0 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java @@ -5,13 +5,10 @@ import com.github.houbb.heaven.util.lang.ObjectUtil; import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordData; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext; -import com.github.houbb.sensitive.word.constant.AppConst; +import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; import java.util.Collection; -import java.util.HashMap; -import java.util.Map; /** * 敏感词 map @@ -63,7 +60,7 @@ public class WordDataTree implements IWordData { @Override public WordContainsTypeEnum contains(StringBuilder stringBuilder, - InnerSensitiveContext innerContext) { + InnerSensitiveWordContext innerContext) { WordDataTreeNode nowNode = root; int len = stringBuilder.length(); @@ -98,7 +95,7 @@ public class WordDataTree implements IWordData { private WordDataTreeNode getNowMap(WordDataTreeNode nowNode, final int index, final StringBuilder stringBuilder, - final InnerSensitiveContext sensitiveContext) { + final InnerSensitiveWordContext sensitiveContext) { final IWordContext context = sensitiveContext.wordContext(); // 这里的 char 已经是统一格式化之后的,所以可以不用再次格式化。 diff --git a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenySystem.java b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenySystem.java index 0d117b6..8c91837 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenySystem.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenySystem.java @@ -1,10 +1,8 @@ package com.github.houbb.sensitive.word.support.deny; import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.heaven.util.guava.Guavas; import com.github.houbb.heaven.util.io.StreamUtil; import com.github.houbb.sensitive.word.api.IWordDeny; -import com.github.houbb.sensitive.word.constant.AppConst; import java.util.List; diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormats.java b/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormats.java deleted file mode 100644 index e4dbccf..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormats.java +++ /dev/null @@ -1,112 +0,0 @@ -package com.github.houbb.sensitive.word.support.format; - -import com.github.houbb.heaven.support.pipeline.Pipeline; -import com.github.houbb.heaven.util.guava.Guavas; -import com.github.houbb.heaven.util.util.ArrayUtil; -import com.github.houbb.heaven.util.util.CollectionUtil; -import com.github.houbb.sensitive.word.api.ICharFormat; -import com.github.houbb.sensitive.word.api.IWordContext; - -import java.util.Collection; -import java.util.List; - -/** - * 格式化工具类 - * @author binbin.hou - * @since 0.3.5 - */ -public final class CharFormats { - - private CharFormats(){} - - /** - * 初始化格式化 - * @param context 上下文 - * @return 结果 - * @since 0.3.0 - */ - public static ICharFormat initCharFormat(final IWordContext context) { - List charFormats = Guavas.newArrayList(); - if(context.ignoreEnglishStyle()) { - charFormats.add(ignoreEnglishStyle()); - } - if(context.ignoreCase()) { - charFormats.add(ignoreCase()); - } - if(context.ignoreWidth()) { - charFormats.add(ignoreWidth()); - } - if(context.ignoreNumStyle()) { - charFormats.add(ignoreNumStyle()); - } - if(context.ignoreChineseStyle()) { - charFormats.add(ignoreChineseStyle()); - } - - return chains(charFormats); - } - - /** - * 链式 - * @param charFormats 列表 - * @return 结果 - */ - public static ICharFormat chains(final ICharFormat ... charFormats) { - if(ArrayUtil.isEmpty(charFormats)) { - return none(); - } - - return new CharFormatInit() { - @Override - protected void init(Pipeline pipeline) { - for(ICharFormat charFormat : charFormats) { - pipeline.addLast(charFormat); - } - } - }; - } - - /** - * 链式 - * @param charFormats 列表 - * @return 结果 - */ - public static ICharFormat chains(final Collection charFormats) { - if(CollectionUtil.isEmpty(charFormats)) { - return none(); - } - - return new CharFormatInit() { - @Override - protected void init(Pipeline pipeline) { - for(ICharFormat charFormat : charFormats) { - pipeline.addLast(charFormat); - } - } - }; - } - - public static ICharFormat none() { - return NoneCharFormat.getInstance(); - } - public static ICharFormat ignoreCase() { - return IgnoreCaseCharFormat.getInstance(); - } - - public static ICharFormat ignoreEnglishStyle() { - return IgnoreEnglishStyleFormat.getInstance(); - } - - public static ICharFormat ignoreChineseStyle() { - return IgnoreChineseStyleFormat.getInstance(); - } - - public static ICharFormat ignoreNumStyle() { - return IgnoreNumStyleCharFormat.getInstance(); - } - - public static ICharFormat ignoreWidth() { - return IgnoreWidthCharFormat.getInstance(); - } - -} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreEnglishStyleFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreEnglishStyleFormat.java deleted file mode 100644 index c8b3148..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreEnglishStyleFormat.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.github.houbb.sensitive.word.support.format; - -import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.sensitive.word.api.ICharFormat; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.utils.InnerCharUtils; - -/** - * 忽略英文的各种格式 - * @author binbin.hou - * @since 0.0.6 - */ -@ThreadSafe -public class IgnoreEnglishStyleFormat implements ICharFormat { - - private static final ICharFormat INSTANCE = new IgnoreEnglishStyleFormat(); - - public static ICharFormat getInstance() { - return INSTANCE; - } - - @Override - public char format(char original, IWordContext context) { - return InnerCharUtils.getMappingChar(original); - } - -} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreCaseCharFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreCase.java similarity index 63% rename from src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreCaseCharFormat.java rename to src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreCase.java index bcc69ad..ab9867e 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreCaseCharFormat.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreCase.java @@ -1,7 +1,7 @@ package com.github.houbb.sensitive.word.support.format; import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.sensitive.word.api.ICharFormat; +import com.github.houbb.sensitive.word.api.IWordFormat; import com.github.houbb.sensitive.word.api.IWordContext; /** @@ -10,11 +10,11 @@ import com.github.houbb.sensitive.word.api.IWordContext; * @since 0.0.5 */ @ThreadSafe -public class IgnoreCaseCharFormat implements ICharFormat { +public class WordFormatIgnoreCase implements IWordFormat { - private static final ICharFormat INSTANCE = new IgnoreCaseCharFormat(); + private static final IWordFormat INSTANCE = new WordFormatIgnoreCase(); - public static ICharFormat getInstance() { + public static IWordFormat getInstance() { return INSTANCE; } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreChineseStyleFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreChineseStyle.java similarity index 63% rename from src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreChineseStyleFormat.java rename to src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreChineseStyle.java index 94e2684..456ae5d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreChineseStyleFormat.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreChineseStyle.java @@ -2,13 +2,10 @@ package com.github.houbb.sensitive.word.support.format; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.util.util.CollectionUtil; -import com.github.houbb.opencc4j.core.impl.ZhConvertBootstrap; -import com.github.houbb.opencc4j.support.segment.impl.CharSegment; import com.github.houbb.opencc4j.util.ZhConverterUtil; -import com.github.houbb.sensitive.word.api.ICharFormat; +import com.github.houbb.sensitive.word.api.IWordFormat; import com.github.houbb.sensitive.word.api.IWordContext; -import java.util.Collections; import java.util.List; /** @@ -17,11 +14,11 @@ import java.util.List; * @since 0.0.5 */ @ThreadSafe -public class IgnoreChineseStyleFormat implements ICharFormat { +public class WordFormatIgnoreChineseStyle implements IWordFormat { - private static final ICharFormat INSTANCE = new IgnoreChineseStyleFormat(); + private static final IWordFormat INSTANCE = new WordFormatIgnoreChineseStyle(); - public static ICharFormat getInstance() { + public static IWordFormat getInstance() { return INSTANCE; } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreEnglishStyle.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreEnglishStyle.java new file mode 100644 index 0000000..eb81f50 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreEnglishStyle.java @@ -0,0 +1,27 @@ +package com.github.houbb.sensitive.word.support.format; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.sensitive.word.api.IWordFormat; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.utils.InnerWordCharUtils; + +/** + * 忽略英文的各种格式 + * @author binbin.hou + * @since 0.0.6 + */ +@ThreadSafe +public class WordFormatIgnoreEnglishStyle implements IWordFormat { + + private static final IWordFormat INSTANCE = new WordFormatIgnoreEnglishStyle(); + + public static IWordFormat getInstance() { + return INSTANCE; + } + + @Override + public char format(char original, IWordContext context) { + return InnerWordCharUtils.getMappingChar(original); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreNumStyleCharFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreNumStyle.java similarity index 50% rename from src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreNumStyleCharFormat.java rename to src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreNumStyle.java index 4fd8511..89c85a2 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreNumStyleCharFormat.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreNumStyle.java @@ -2,8 +2,8 @@ package com.github.houbb.sensitive.word.support.format; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.api.ICharFormat; -import com.github.houbb.sensitive.word.utils.InnerNumUtils; +import com.github.houbb.sensitive.word.api.IWordFormat; +import com.github.houbb.sensitive.word.utils.InnerWordNumUtils; /** * 忽略数字的样式 @@ -11,17 +11,17 @@ import com.github.houbb.sensitive.word.utils.InnerNumUtils; * @since 0.0.5 */ @ThreadSafe -public class IgnoreNumStyleCharFormat implements ICharFormat { +public class WordFormatIgnoreNumStyle implements IWordFormat { - private static final ICharFormat INSTANCE = new IgnoreNumStyleCharFormat(); + private static final IWordFormat INSTANCE = new WordFormatIgnoreNumStyle(); - public static ICharFormat getInstance() { + public static IWordFormat getInstance() { return INSTANCE; } @Override public char format(char original, IWordContext context) { - return InnerNumUtils.getMappingChar(original); + return InnerWordNumUtils.getMappingChar(original); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreWidthCharFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreWidth.java similarity index 66% rename from src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreWidthCharFormat.java rename to src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreWidth.java index e34cab3..4b700bd 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreWidthCharFormat.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreWidth.java @@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.format; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.util.lang.CharUtil; import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.api.ICharFormat; +import com.github.houbb.sensitive.word.api.IWordFormat; /** * 格式化字宽度 @@ -11,11 +11,11 @@ import com.github.houbb.sensitive.word.api.ICharFormat; * @since 0.0.5 */ @ThreadSafe -public class IgnoreWidthCharFormat implements ICharFormat { +public class WordFormatIgnoreWidth implements IWordFormat { - private static final ICharFormat INSTANCE = new IgnoreWidthCharFormat(); + private static final IWordFormat INSTANCE = new WordFormatIgnoreWidth(); - public static ICharFormat getInstance() { + public static IWordFormat getInstance() { return INSTANCE; } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatInit.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatInit.java similarity index 68% rename from src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatInit.java rename to src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatInit.java index 31e2f0e..2c0c4fd 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatInit.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatInit.java @@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.format; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.support.pipeline.Pipeline; import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline; -import com.github.houbb.sensitive.word.api.ICharFormat; +import com.github.houbb.sensitive.word.api.IWordFormat; import com.github.houbb.sensitive.word.api.IWordContext; import java.util.List; @@ -14,7 +14,7 @@ import java.util.List; * @since 0.0.5 */ @ThreadSafe -public abstract class CharFormatInit implements ICharFormat { +public abstract class WordFormatInit implements IWordFormat { /** * 初始化列表 @@ -22,18 +22,18 @@ public abstract class CharFormatInit implements ICharFormat { * @param pipeline 当前列表泳道 * @since 0.0.13 */ - protected abstract void init(final Pipeline pipeline); + protected abstract void init(final Pipeline pipeline); @Override public char format(char original, IWordContext context) { - Pipeline pipeline = new DefaultPipeline<>(); + Pipeline pipeline = new DefaultPipeline<>(); init(pipeline); char result = original; // 循环执行 - List charFormats = pipeline.list(); - for(ICharFormat charFormat : charFormats) { + List charFormats = pipeline.list(); + for(IWordFormat charFormat : charFormats) { result = charFormat.format(result, context); } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/NoneCharFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatNone.java similarity index 63% rename from src/main/java/com/github/houbb/sensitive/word/support/format/NoneCharFormat.java rename to src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatNone.java index c1c092d..db244d8 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/NoneCharFormat.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatNone.java @@ -1,7 +1,7 @@ package com.github.houbb.sensitive.word.support.format; import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.sensitive.word.api.ICharFormat; +import com.github.houbb.sensitive.word.api.IWordFormat; import com.github.houbb.sensitive.word.api.IWordContext; /** @@ -11,11 +11,11 @@ import com.github.houbb.sensitive.word.api.IWordContext; * @since 0.0.5 */ @ThreadSafe -public class NoneCharFormat implements ICharFormat { +public class WordFormatNone implements IWordFormat { - private static final ICharFormat INSTANCE = new NoneCharFormat(); + private static final IWordFormat INSTANCE = new WordFormatNone(); - public static ICharFormat getInstance() { + public static IWordFormat getInstance() { return INSTANCE; } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java new file mode 100644 index 0000000..eceb3b4 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java @@ -0,0 +1,85 @@ +package com.github.houbb.sensitive.word.support.format; + +import com.github.houbb.heaven.support.pipeline.Pipeline; +import com.github.houbb.heaven.util.guava.Guavas; +import com.github.houbb.heaven.util.util.ArrayUtil; +import com.github.houbb.heaven.util.util.CollectionUtil; +import com.github.houbb.sensitive.word.api.IWordFormat; +import com.github.houbb.sensitive.word.api.IWordContext; + +import java.util.Collection; +import java.util.List; + +/** + * 格式化工具类 + * @author binbin.hou + * @since 0.3.5 + */ +public final class WordFormats { + + private WordFormats(){} + + /** + * 链式 + * @param charFormats 列表 + * @return 结果 + */ + public static IWordFormat chains(final IWordFormat... charFormats) { + if(ArrayUtil.isEmpty(charFormats)) { + return none(); + } + + return new WordFormatInit() { + @Override + protected void init(Pipeline pipeline) { + for(IWordFormat charFormat : charFormats) { + pipeline.addLast(charFormat); + } + } + }; + } + + /** + * 链式 + * @param charFormats 列表 + * @return 结果 + */ + public static IWordFormat chains(final Collection charFormats) { + if(CollectionUtil.isEmpty(charFormats)) { + return none(); + } + + return new WordFormatInit() { + @Override + protected void init(Pipeline pipeline) { + for(IWordFormat charFormat : charFormats) { + pipeline.addLast(charFormat); + } + } + }; + } + + public static IWordFormat none() { + return WordFormatNone.getInstance(); + } + public static IWordFormat ignoreCase() { + return WordFormatIgnoreCase.getInstance(); + } + + public static IWordFormat ignoreEnglishStyle() { + return WordFormatIgnoreEnglishStyle.getInstance(); + } + + public static IWordFormat ignoreChineseStyle() { + return WordFormatIgnoreChineseStyle.getInstance(); + } + + public static IWordFormat ignoreNumStyle() { + return WordFormatIgnoreNumStyle.getInstance(); + } + + public static IWordFormat ignoreWidth() { + return WordFormatIgnoreWidth.getInstance(); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java b/src/main/java/com/github/houbb/sensitive/word/support/replace/WordReplaceChar.java similarity index 78% rename from src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java rename to src/main/java/com/github/houbb/sensitive/word/support/replace/WordReplaceChar.java index 878c4da..e392948 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/replace/WordReplaceChar.java @@ -2,7 +2,7 @@ package com.github.houbb.sensitive.word.support.replace; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.constant.CharConst; -import com.github.houbb.sensitive.word.api.ISensitiveWordReplace; +import com.github.houbb.sensitive.word.api.IWordReplace; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordResult; @@ -12,7 +12,7 @@ import com.github.houbb.sensitive.word.api.IWordResult; * @since 0.2.0 */ @ThreadSafe -public class SensitiveWordReplaceChar implements ISensitiveWordReplace { +public class WordReplaceChar implements IWordReplace { /** * 替换的字符 @@ -20,11 +20,11 @@ public class SensitiveWordReplaceChar implements ISensitiveWordReplace { */ private final char replaceChar; - public SensitiveWordReplaceChar(char replaceChar) { + public WordReplaceChar(char replaceChar) { this.replaceChar = replaceChar; } - public SensitiveWordReplaceChar() { + public WordReplaceChar() { this(CharConst.STAR); } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaces.java b/src/main/java/com/github/houbb/sensitive/word/support/replace/WordReplaces.java similarity index 52% rename from src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaces.java rename to src/main/java/com/github/houbb/sensitive/word/support/replace/WordReplaces.java index a2f8034..3ffc700 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaces.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/replace/WordReplaces.java @@ -1,6 +1,6 @@ package com.github.houbb.sensitive.word.support.replace; -import com.github.houbb.sensitive.word.api.ISensitiveWordReplace; +import com.github.houbb.sensitive.word.api.IWordReplace; /** * 字符替换策略工具类 @@ -8,9 +8,9 @@ import com.github.houbb.sensitive.word.api.ISensitiveWordReplace; * @author binbin.hou * @since 0.3.0 */ -public final class SensitiveWordReplaces { +public final class WordReplaces { - private SensitiveWordReplaces(){} + private WordReplaces(){} /** * 字符 @@ -18,8 +18,8 @@ public final class SensitiveWordReplaces { * @return 结果 * @since 0.3.0 */ - public static ISensitiveWordReplace chars(final char c) { - return new SensitiveWordReplaceChar(c); + public static IWordReplace chars(final char c) { + return new WordReplaceChar(c); } /** @@ -27,8 +27,8 @@ public final class SensitiveWordReplaces { * @return 结果 * @since 0.3.0 */ - public static ISensitiveWordReplace chars() { - return new SensitiveWordReplaceChar(); + public static IWordReplace chars() { + return new WordReplaceChar(); } /** @@ -36,7 +36,7 @@ public final class SensitiveWordReplaces { * @return 结果 * @since 0.7.0 */ - public static ISensitiveWordReplace defaults() { + public static IWordReplace defaults() { return chars(); } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java index 9c41ef7..6748946 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java @@ -4,7 +4,7 @@ import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordResult; import com.github.houbb.sensitive.word.api.IWordResultHandler; -import com.github.houbb.sensitive.word.utils.InnerCharUtils; +import com.github.houbb.sensitive.word.utils.InnerWordCharUtils; /** * 只保留单词 @@ -31,7 +31,7 @@ public class WordResultHandlerWord implements IWordResultHandler { } // 截取 - return InnerCharUtils.getString(originalText.toCharArray(), wordResult); + return InnerWordCharUtils.getString(originalText.toCharArray(), wordResult); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordCharUtils.java similarity index 97% rename from src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java rename to src/main/java/com/github/houbb/sensitive/word/utils/InnerWordCharUtils.java index f6c8653..6e9374c 100644 --- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordCharUtils.java @@ -13,9 +13,9 @@ import java.util.Map; * @author Administrator * @since 0.0.4 */ -public final class InnerCharUtils { +public final class InnerWordCharUtils { - private InnerCharUtils() { + private InnerWordCharUtils() { } /** diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordDataUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordDataUtils.java deleted file mode 100644 index dd8e18b..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordDataUtils.java +++ /dev/null @@ -1,49 +0,0 @@ -package com.github.houbb.sensitive.word.utils; - -import com.github.houbb.heaven.util.util.CollectionUtil; -import com.github.houbb.sensitive.word.api.IWordContext; - -import java.util.*; - -/** - * 数据工具包 - * @since 0.3.0 - */ -public final class InnerWordDataUtils { - - private InnerWordDataUtils(){} - - /** - * 获取禁止列表中真正的禁止词汇 - * @param denyList 禁止 - * @param allowList 允许 - * @param context 上下文 - * @return 结果 - * @since 0.3.0 - */ - public static List getActualDenyList(List denyList, List allowList, - final IWordContext context) { - if(CollectionUtil.isEmpty(denyList)) { - return Collections.emptyList(); - } - if(CollectionUtil.isEmpty(allowList)) { - return denyList; - } - - List formatDenyList = InnerFormatUtils.formatWordList(denyList, context); - List formatAllowList = InnerFormatUtils.formatWordList(allowList, context); - - List resultList = new ArrayList<>(); - // O(1) - Set allowSet = new HashSet<>(formatAllowList); - - for(String deny : formatDenyList) { - if(allowSet.contains(deny)) { - continue; - } - - resultList.add(deny); - } - return resultList; - } -} diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java similarity index 87% rename from src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java rename to src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java index fe96fe2..6389b77 100644 --- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java @@ -2,7 +2,7 @@ package com.github.houbb.sensitive.word.utils; import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.heaven.util.util.CollectionUtil; -import com.github.houbb.sensitive.word.api.ICharFormat; +import com.github.houbb.sensitive.word.api.IWordFormat; import com.github.houbb.sensitive.word.api.IWordContext; import java.util.*; @@ -11,9 +11,9 @@ import java.util.*; * 内部格式化工具类 * @since 0.1.1 */ -public final class InnerFormatUtils { +public final class InnerWordFormatUtils { - private InnerFormatUtils(){} + private InnerWordFormatUtils(){} /** * 空字符数组 @@ -34,7 +34,7 @@ public final class InnerFormatUtils { } StringBuilder stringBuilder = new StringBuilder(); - ICharFormat charFormat = context.charFormat(); + IWordFormat charFormat = context.wordFormat(); char[] chars = original.toCharArray(); for(char c : chars) { char cf = charFormat.format(c, context); @@ -61,7 +61,7 @@ public final class InnerFormatUtils { char[] rawChars = original.toCharArray(); Map map = new HashMap<>(rawChars.length); - ICharFormat charFormat = context.charFormat(); + IWordFormat charFormat = context.wordFormat(); for(int i = 0; i < len; i++) { final char currentChar = rawChars[i]; char formatChar = charFormat.format(currentChar, context); @@ -86,7 +86,7 @@ public final class InnerFormatUtils { List resultList = new ArrayList<>(list.size()); for(String word : list) { - String formatWord = InnerFormatUtils.format(word, context); + String formatWord = InnerWordFormatUtils.format(word, context); resultList.add(formatWord); } diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerNumUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordNumUtils.java similarity index 93% rename from src/main/java/com/github/houbb/sensitive/word/utils/InnerNumUtils.java rename to src/main/java/com/github/houbb/sensitive/word/utils/InnerWordNumUtils.java index 62b40c1..9cfa860 100644 --- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerNumUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordNumUtils.java @@ -4,7 +4,7 @@ import com.github.houbb.heaven.util.guava.Guavas; import com.github.houbb.heaven.util.lang.ObjectUtil; import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum; import java.util.Map; @@ -15,9 +15,9 @@ import java.util.Map; * @author Administrator * @since 0.0.4 */ -public final class InnerNumUtils { +public final class InnerWordNumUtils { - private InnerNumUtils(){} + private InnerWordNumUtils(){} private static final String NUM_ONE = "⓪0零º₀⓿○" + "123456789" + @@ -114,13 +114,13 @@ public final class InnerNumUtils { * * @param txt 文本信息 * @param beginIndex 开始下标 - * @param validModeEnum 验证模式 + * @param wordValidModeEnum 验证模式 * @param context 执行上下文 * @return 敏感数字对应的长度 * @since 0.0.5 */ private int getSensitiveNumber(final String txt, final int beginIndex, - final ValidModeEnum validModeEnum, + final WordValidModeEnum wordValidModeEnum, final IWordContext context) { return 0; } diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java index 72f0a03..f17dda8 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java @@ -2,7 +2,7 @@ package com.github.houbb.sensitive.word.bs; import com.github.houbb.sensitive.word.support.allow.WordAllows; import com.github.houbb.sensitive.word.support.deny.WordDenys; -import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaces; +import com.github.houbb.sensitive.word.support.replace.WordReplaces; import org.junit.Assert; import org.junit.Test; @@ -73,7 +73,7 @@ public class SensitiveWordBsTest { final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; String result = SensitiveWordBs.newInstance() - .sensitiveWordReplace(SensitiveWordReplaces.chars('0')) + .wordReplace(WordReplaces.chars('0')) .init() .replace(text); Assert.assertEquals("0000迎风飘扬,000的画像屹立在000前。", result); diff --git a/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java index e66c145..93e8f58 100644 --- a/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/core/SensitiveWordHelperTest.java @@ -1,8 +1,8 @@ package com.github.houbb.sensitive.word.core; -import com.github.houbb.sensitive.word.api.ISensitiveWordReplace; +import com.github.houbb.sensitive.word.api.IWordReplace; import com.github.houbb.sensitive.word.api.IWordResult; -import com.github.houbb.sensitive.word.replace.MySensitiveWordReplace; +import com.github.houbb.sensitive.word.replace.MyWordReplace; import com.github.houbb.sensitive.word.support.result.WordResultHandlers; import org.junit.Assert; import org.junit.Test; @@ -158,7 +158,7 @@ public class SensitiveWordHelperTest { public void defineReplaceTest() { final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; - ISensitiveWordReplace replace = new MySensitiveWordReplace(); + IWordReplace replace = new MyWordReplace(); String result = SensitiveWordHelper.replace(text, replace); Assert.assertEquals("国家旗帜迎风飘扬,教员的画像屹立在***前。", result); diff --git a/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java b/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java index a6844ef..7507aa1 100644 --- a/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java @@ -7,7 +7,7 @@ import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.opencc4j.core.impl.ZhConvertBootstrap; import com.github.houbb.opencc4j.support.segment.impl.CharSegment; -import com.github.houbb.sensitive.word.utils.InnerNumUtils; +import com.github.houbb.sensitive.word.utils.InnerWordNumUtils; import org.junit.Ignore; import org.junit.Test; @@ -127,7 +127,7 @@ public class DictSlimTest { // 停顿词语 String trim = string.replaceAll("加|否|与|和", ""); - String mapString = InnerNumUtils.getMappingString(trim); + String mapString = InnerWordNumUtils.getMappingString(trim); boolean result = StringUtil.isDigit(mapString); if(result) { System.out.println(string); diff --git a/src/test/java/com/github/houbb/sensitive/word/replace/MySensitiveWordReplace.java b/src/test/java/com/github/houbb/sensitive/word/replace/MyWordReplace.java similarity index 77% rename from src/test/java/com/github/houbb/sensitive/word/replace/MySensitiveWordReplace.java rename to src/test/java/com/github/houbb/sensitive/word/replace/MyWordReplace.java index 374e26b..d9a0032 100644 --- a/src/test/java/com/github/houbb/sensitive/word/replace/MySensitiveWordReplace.java +++ b/src/test/java/com/github/houbb/sensitive/word/replace/MyWordReplace.java @@ -1,9 +1,9 @@ package com.github.houbb.sensitive.word.replace; -import com.github.houbb.sensitive.word.api.ISensitiveWordReplace; +import com.github.houbb.sensitive.word.api.IWordReplace; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordResult; -import com.github.houbb.sensitive.word.utils.InnerCharUtils; +import com.github.houbb.sensitive.word.utils.InnerWordCharUtils; /** * 自定义敏感词替换策略 @@ -11,11 +11,11 @@ import com.github.houbb.sensitive.word.utils.InnerCharUtils; * @author binbin.hou * @since 0.2.0 */ -public class MySensitiveWordReplace implements ISensitiveWordReplace { +public class MyWordReplace implements IWordReplace { @Override public void replace(StringBuilder stringBuilder, final char[] rawChars, IWordResult wordResult, IWordContext wordContext) { - String sensitiveWord = InnerCharUtils.getString(rawChars, wordResult); + String sensitiveWord = InnerWordCharUtils.getString(rawChars, wordResult); // 自定义不同的敏感词替换策略,可以从数据库等地方读取 if("五星红旗".equals(sensitiveWord)) { stringBuilder.append("国家旗帜");