From ab6e91f7a415aa787902649b5190be78c64cf6ef Mon Sep 17 00:00:00 2001 From: "binbin.hou" Date: Tue, 6 Jun 2023 19:58:35 +0800 Subject: [PATCH] release branch 0.3.0 --- CHANGE_LOG.md | 8 + README.md | 37 ++- pom.xml | 2 +- release.bat | 4 +- .../sensitive/word/api/IWordContext.java | 49 +++ .../houbb/sensitive/word/api/IWordData.java | 20 -- .../houbb/sensitive/word/api/IWordMap.java | 2 - .../sensitive/word/bs/SensitiveWordBs.java | 310 ++++++++---------- .../word/bs/SensitiveWordContext.java | 50 +++ .../houbb/sensitive/word/bs/package-info.java | 5 - .../sensitive/word/constant/AppConst.java | 6 + .../word/core/SensitiveWordHelper.java | 8 +- .../word/support/allow/WordAllowSystem.java | 9 + .../word/support/allow/WordAllows.java | 3 +- .../check/impl/SensitiveCheckChain.java | 58 ---- .../check/impl/SensitiveCheckEmail.java | 13 +- .../check/impl/SensitiveCheckInit.java | 51 +++ .../check/impl/SensitiveCheckNone.java | 41 +++ .../support/check/impl/SensitiveCheckNum.java | 13 +- .../support/check/impl/SensitiveCheckUrl.java | 17 +- .../check/impl/SensitiveCheckWord.java | 15 +- .../support/check/impl/SensitiveChecks.java | 97 ++++++ .../word/support/data/SensitiveWordData.java | 53 --- .../word/support/deny/WordDenySystem.java | 9 + .../word/support/deny/WordDenys.java | 3 +- .../word/support/format/CharFormatChain.java | 48 --- .../word/support/format/CharFormatInit.java | 43 +++ .../word/support/format/CharFormats.java | 112 +++++++ .../support/format/IgnoreCaseCharFormat.java | 6 + .../format/IgnoreChineseStyleFormat.java | 8 +- .../format/IgnoreEnglishStyleFormat.java | 10 +- .../format/IgnoreNumStyleCharFormat.java | 10 +- .../support/format/IgnoreWidthCharFormat.java | 8 +- .../word/support/format/NoneCharFormat.java | 27 ++ .../word/support/map/SensitiveWordMap.java | 17 +- .../sensitive/word/support/map/WordMaps.java | 24 ++ .../replace/SensitiveWordReplaceChar.java | 9 + .../replace/SensitiveWordReplaces.java | 34 ++ .../support/result/WordResultHandlerRaw.java | 9 + .../support/result/WordResultHandlerWord.java | 9 + .../support/result/WordResultHandlers.java | 5 +- .../{CharUtils.java => InnerCharUtils.java} | 4 +- .../word/utils/InnerFormatUtils.java | 32 +- .../{NumUtils.java => InnerNumUtils.java} | 4 +- .../word/utils/InnerWordDataUtils.java | 48 +++ .../word/bs/SensitiveWordBsChineseTest.java | 2 +- .../word/bs/SensitiveWordBsEmailTest.java | 4 +- .../word/bs/SensitiveWordBsEnglishTest.java | 2 +- .../word/bs/SensitiveWordBsNumLenTest.java | 3 +- .../word/bs/SensitiveWordBsNumTest.java | 4 +- .../word/bs/SensitiveWordBsRepeatTest.java | 1 + .../word/bs/SensitiveWordBsTest.java | 18 +- .../word/bs/SensitiveWordBsUrlTest.java | 10 +- .../bs/SensitiveWordBsUserDefineTest.java | 2 +- .../sensitive/word/data/DictNumTest.java | 6 - .../sensitive/word/data/DictSlimTest.java | 5 +- 56 files changed, 952 insertions(+), 455 deletions(-) delete mode 100644 src/main/java/com/github/houbb/sensitive/word/api/IWordData.java delete mode 100644 src/main/java/com/github/houbb/sensitive/word/bs/package-info.java delete mode 100644 src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckChain.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveChecks.java delete mode 100644 src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java delete mode 100644 src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatChain.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatInit.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/format/CharFormats.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/format/NoneCharFormat.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/map/WordMaps.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaces.java rename src/main/java/com/github/houbb/sensitive/word/utils/{CharUtils.java => InnerCharUtils.java} (96%) rename src/main/java/com/github/houbb/sensitive/word/utils/{NumUtils.java => InnerNumUtils.java} (98%) create mode 100644 src/main/java/com/github/houbb/sensitive/word/utils/InnerWordDataUtils.java diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 8c40181..46309f5 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -140,3 +140,11 @@ |:---|:---|:---|:---|:--| | 1 | O | 移除日志初始化的控台日志输出 | 2023-02-17 23:51:58 | | | 2 | A | 支持数字检验的长度指定 | 2022-01-17 23:51:58 | | + +# release_0.3.0 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|:-----------------|:--------------------|:--| +| 1 | O | 移除冗余的耗时统计 | 2023-06-06 23:51:58 | | +| 2 | A | 优化代码实现方式,添加工具类方法 | 2023-06-06 23:51:58 | | + diff --git a/README.md b/README.md index 89f3a6b..1c8a0ad 100644 --- a/README.md +++ b/README.md @@ -46,9 +46,9 @@ [CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/doc/CHANGE_LOG.md) -v0.2.1 变更: +v0.3.0 变更: -- 支持用户自定义数字检测的长度 +- 代码实现优化 # 快速开始 @@ -64,7 +64,7 @@ v0.2.1 变更: com.github.houbb sensitive-word - 0.2.1 + 0.3.0 ``` @@ -285,6 +285,7 @@ final String text = "ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦ the bad words"; List wordList = SensitiveWordBs.newInstance() .ignoreRepeat(true) + .init() .findAll(text); Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString()); ``` @@ -308,12 +309,13 @@ V0.2.1 之后,支持通过 `numCheckLen(长度)` 自定义检测的长度。 final String text = "你懂得:12345678"; // 默认检测 8 位 -List wordList = SensitiveWordBs.newInstance().findAll(text); +List wordList = SensitiveWordBs.newInstance().init().findAll(text); Assert.assertEquals("[12345678]", wordList.toString()); // 指定数字的长度,避免误杀 List wordList2 = SensitiveWordBs.newInstance() .numCheckLen(9) + .init() .findAll(text); Assert.assertEquals("[]", wordList2.toString()); ``` @@ -353,18 +355,19 @@ Assert.assertTrue(wordBs.contains(text)); 其中各项配置的说明如下: -| 序号 | 方法 | 说明 | 默认值 | -|:----|:---|:--------------|:------| -| 1 | ignoreCase | 忽略大小写 | true | -| 2 | ignoreWidth | 忽略半角圆角 | true | -| 3 | ignoreNumStyle | 忽略数字的写法 | true | -| 4 | ignoreChineseStyle | 忽略中文的书写格式 | true | -| 5 | ignoreEnglishStyle | 忽略英文的书写格式 | true | -| 6 | ignoreRepeat | 忽略重复词 | false | -| 7 | enableNumCheck | 是否启用数字检测。 | true | -| 8 | enableEmailCheck | 是有启用邮箱检测 | true | -| 9 | enableUrlCheck | 是否启用链接检测 | true | -| 10 | numCheckLen | 数字检测,自定义指定长度。| 8 | +| 序号 | 方法 | 说明 | 默认值 | +|:---|:---------------------|:--------------|:-------| +| 1 | ignoreCase | 忽略大小写 | true | +| 2 | ignoreWidth | 忽略半角圆角 | true | +| 3 | ignoreNumStyle | 忽略数字的写法 | true | +| 4 | ignoreChineseStyle | 忽略中文的书写格式 | true | +| 5 | ignoreEnglishStyle | 忽略英文的书写格式 | true | +| 6 | ignoreRepeat | 忽略重复词 | false | +| 7 | enableNumCheck | 是否启用数字检测。 | true | +| 8 | enableEmailCheck | 是有启用邮箱检测 | true | +| 9 | enableUrlCheck | 是否启用链接检测 | true | +| 10 | numCheckLen | 数字检测,自定义指定长度。 | 8 | +| 11 | sensitiveWordReplace | 敏感词替换策略 | `*` 替换 | # 动态加载(用户自定义) @@ -612,6 +615,8 @@ public class SensitiveWordService { # 后期 road-map +- [ ] wordMap 的抽象,便于拓展 + - 同音字处理 - 形近字处理 diff --git a/pom.xml b/pom.xml index 9c934e7..58f1d6b 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.2.1 + 0.3.0 diff --git a/release.bat b/release.bat index 25c7ada..c267b31 100644 --- a/release.bat +++ b/release.bat @@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.2.1 +SET version=0.3.0 :::: 新版本名称 -SET newVersion=0.2.2 +SET newVersion=0.4.0 :::: 组织名称 SET groupName=com.github.houbb :::: 项目名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java index fe1a233..2a0da76 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java @@ -1,5 +1,7 @@ package com.github.houbb.sensitive.word.api; +import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; + import java.util.Map; /** @@ -173,4 +175,51 @@ public interface IWordContext { */ IWordContext sensitiveCheckNumLen(final int sensitiveCheckNumLen); + /** + * 设置检测策略 + * @param sensitiveCheck 检测策略 + * @return this + * @since 0.3.0 + */ + IWordContext sensitiveCheck(final ISensitiveCheck sensitiveCheck); + + /** + * 获取检测策略 + * @return 检测策略 + * @since 0.3.0 + */ + ISensitiveCheck sensitiveCheck(); + + /** + * 设置敏感词替换策略 + * @param sensitiveWordReplace 策略 + * @return this + * @since 0.3.0 + */ + IWordContext sensitiveWordReplace(final ISensitiveWordReplace sensitiveWordReplace); + + /** + * 敏感词替换策略 + * @return 替换策略 + * @since 0.3.0 + */ + ISensitiveWordReplace sensitiveWordReplace(); + + /** + * 设置统一的字符处理 + * + * @param charFormat 字符处理 + * @return 结果 + * @since 0.3.0 + */ + IWordContext charFormat(final ICharFormat charFormat); + + /** + * 获取格式化策略 + * + * @return 策略 + * @since 0.3.0 + */ + ICharFormat charFormat(); + } diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java deleted file mode 100644 index 6413285..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.github.houbb.sensitive.word.api; - -import java.util.List; - -/** - * 数据词接口 - * @author binbin.hou - * @since 0.0.1 - */ -@Deprecated -public interface IWordData { - - /** - * 获取对应的敏感词 - * @return 结果 - * @since 0.0.1 - */ - List getWordData(); - -} diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java index 238c15d..31610c6 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java @@ -59,13 +59,11 @@ public interface IWordMap extends ISensitiveCheck { * ps: 这里可以添加优化。 * * @param target 目标字符串 - * @param replace 替换策略 * @param context 上下文 * @return 替换后结果 * @since 0.0.2 */ String replace(final String target, - final ISensitiveWordReplace replace, final IWordContext context); } diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index c2909af..bca90fc 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -1,18 +1,20 @@ package com.github.houbb.sensitive.word.bs; -import com.github.houbb.heaven.constant.CharConst; import com.github.houbb.heaven.support.handler.IHandler; import com.github.houbb.heaven.util.common.ArgUtil; import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.sensitive.word.api.*; import com.github.houbb.sensitive.word.support.allow.WordAllows; +import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; +import com.github.houbb.sensitive.word.support.check.impl.SensitiveChecks; import com.github.houbb.sensitive.word.support.deny.WordDenys; -import com.github.houbb.sensitive.word.support.map.SensitiveWordMap; -import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceChar; +import com.github.houbb.sensitive.word.support.format.CharFormats; +import com.github.houbb.sensitive.word.support.map.WordMaps; +import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaces; import com.github.houbb.sensitive.word.support.result.WordResultHandlers; -import com.github.houbb.sensitive.word.utils.InnerFormatUtils; +import com.github.houbb.sensitive.word.utils.InnerWordDataUtils; -import java.util.*; +import java.util.List; /** * 敏感词引导类 @@ -30,19 +32,63 @@ public class SensitiveWordBs { private SensitiveWordBs() { } + + //------------------------------------------------------------- 基本属性 START + // 格式统一化 + /** + * 是否忽略大小写 + */ + private boolean ignoreCase = true; + /** + * 是否忽略全角、半角 + */ + private boolean ignoreWidth = true; + /** + * 是否忽略数字样式 + */ + private boolean ignoreNumStyle = true; + /** + * 是否忽略中文样式 + */ + private boolean ignoreChineseStyle = true; + /** + * 是否忽略英文样式 + */ + private boolean ignoreEnglishStyle = true; + /** + * 是否忽略重复 + */ + private boolean ignoreRepeat = false; + + // 开启校验 + /** + * 启用数字检测 + */ + private boolean sensitiveCheckNum = true; + /** + * 启用邮箱检测 + */ + private boolean sensitiveCheckEmail = true; + /** + * 启用 URL 检测 + */ + private boolean sensitiveCheckUrl = true; + + // 额外配置 + /** + * 检测数字时的长度 + */ + private int sensitiveCheckNumLen = 8; + + //------------------------------------------------------------- 基本属性 END /** * 敏感词 map * + * TODO: 暂时定义为 final,后续放开抽象。 + * * @since 0.0.1 */ - private IWordMap sensitiveWordMap; - - /** - * 默认的执行上下文 - * - * @since 0.0.4 - */ - private final IWordContext context = buildDefaultContext(); + private final IWordMap wordMap = WordMaps.defaults(); /** * 禁止的单词 @@ -57,76 +103,21 @@ public class SensitiveWordBs { private IWordAllow wordAllow = WordAllows.system(); /** - * DCL 初始化 wordMap 信息 - * - * 注意:map 的构建是一个比较耗时的动作 - * @since 0.0.4 + * 替换策略 + * @since 0.3.0 */ - private synchronized void initWordMap() { - // 加载配置信息 - List denyList = wordDeny.deny(); - List allowList = wordAllow.allow(); - List results = getActualDenyList(denyList, allowList); - - // 初始化 DFA 信息 - if(sensitiveWordMap == null) { - sensitiveWordMap = new SensitiveWordMap(); - } - // 便于可以多次初始化 - sensitiveWordMap.initWordMap(results); - } + private ISensitiveWordReplace sensitiveWordReplace = SensitiveWordReplaces.chars(); /** - * 获取禁止列表中真正的禁止词汇 - * @param denyList 禁止 - * @param allowList 允许 - * @return 结果 - * @since 0.1.1 + * 上下文 + * @since 0.3.0 */ - List getActualDenyList(List denyList, - List allowList) { - if(CollectionUtil.isEmpty(denyList)) { - return Collections.emptyList(); - } - if(CollectionUtil.isEmpty(allowList)) { - return denyList; - } + private IWordContext context = SensitiveWordContext.newInstance(); - List formatDenyList = this.formatWordList(denyList); - List formatAllowList = this.formatWordList(allowList); - - List resultList = new ArrayList<>(); - // O(1) - Set allowSet = new HashSet<>(formatAllowList); - - for(String deny : formatDenyList) { - if(allowSet.contains(deny)) { - continue; - } - - resultList.add(deny); - } - return resultList; - } - - /** - * 数据格式化处理 - * @param list 列表 - * @return 结果 - * @since 0.1.1 - */ - private List formatWordList(List list) { - if(CollectionUtil.isEmpty(list)) { - return list; - } - - List resultList = new ArrayList<>(list.size()); - for(String word : list) { - String formatWord = InnerFormatUtils.format(word, this.context); - resultList.add(formatWord); - } - - return resultList; + public SensitiveWordBs sensitiveWordReplace(ISensitiveWordReplace sensitiveWordReplace) { + ArgUtil.notNull(sensitiveWordReplace, "sensitiveWordReplace"); + this.sensitiveWordReplace = sensitiveWordReplace; + return this; } /** @@ -149,11 +140,68 @@ public class SensitiveWordBs { * @return this */ public SensitiveWordBs init() { + // 初始化 context + this.initContext(); + + // 替换策略 + final ICharFormat charFormat = CharFormats.initCharFormat(context); + context.charFormat(charFormat); + + // 3. 初始化对应的 sensitiveCheck + final ISensitiveCheck sensitiveCheck = SensitiveChecks.initSensitiveCheck(context); + context.sensitiveCheck(sensitiveCheck); + + //2. 初始化 word this.initWordMap(); return this; } + /** + * 构建默认的上下文 + * + * @return 结果 + * @since 0.0.4 + */ + private IWordContext initContext() { + this.context = SensitiveWordContext.newInstance(); + + // 格式统一化 + context.ignoreCase(ignoreCase); + context.ignoreWidth(ignoreWidth); + context.ignoreNumStyle(ignoreNumStyle); + context.ignoreChineseStyle(ignoreChineseStyle); + context.ignoreEnglishStyle(ignoreEnglishStyle); + context.ignoreRepeat(ignoreRepeat); + + // 开启校验 + context.sensitiveCheckNum(sensitiveCheckNum); + context.sensitiveCheckEmail(sensitiveCheckEmail); + context.sensitiveCheckUrl(sensitiveCheckUrl); + + // 额外配置 + context.sensitiveCheckNumLen(sensitiveCheckNumLen); + context.sensitiveWordReplace(sensitiveWordReplace); + + return context; + } + + /** + * DCL 初始化 wordMap 信息 + * + * 注意:map 的构建是一个比较耗时的动作 + * @since 0.0.4 + */ + private synchronized void initWordMap() { + // 加载配置信息 + List denyList = wordDeny.deny(); + List allowList = wordAllow.allow(); + List results = InnerWordDataUtils.getActualDenyList(denyList, allowList, context); + + // 便于可以多次初始化 + wordMap.initWordMap(results); + } + /** * 设置禁止的实现 * @param wordDeny 禁止的实现 @@ -186,7 +234,7 @@ public class SensitiveWordBs { * @return this */ public SensitiveWordBs enableNumCheck(boolean enableNumCheck) { - this.context.sensitiveCheckNum(enableNumCheck); + this.sensitiveCheckNum = enableNumCheck; return this; } @@ -197,7 +245,7 @@ public class SensitiveWordBs { * @since 0.2.1 */ public SensitiveWordBs numCheckLen(int numCheckLen) { - this.context.sensitiveCheckNumLen(numCheckLen); + this.sensitiveCheckNumLen = numCheckLen; return this; } @@ -209,7 +257,7 @@ public class SensitiveWordBs { * @return this */ public SensitiveWordBs enableEmailCheck(boolean enableEmailCheck) { - this.context.sensitiveCheckEmail(enableEmailCheck); + this.sensitiveCheckEmail = enableEmailCheck; return this; } @@ -221,7 +269,7 @@ public class SensitiveWordBs { * @return this */ public SensitiveWordBs enableUrlCheck(boolean enableUrlCheck) { - this.context.sensitiveCheckUrl(enableUrlCheck); + this.sensitiveCheckUrl = enableUrlCheck; return this; } @@ -232,7 +280,7 @@ public class SensitiveWordBs { * @since 0.0.14 */ public SensitiveWordBs ignoreCase(boolean ignoreCase) { - this.context.ignoreCase(ignoreCase); + this.ignoreCase = ignoreCase; return this; } @@ -243,7 +291,7 @@ public class SensitiveWordBs { * @since 0.0.14 */ public SensitiveWordBs ignoreWidth(boolean ignoreWidth) { - this.context.ignoreWidth(ignoreWidth); + this.ignoreWidth = ignoreWidth; return this; } @@ -254,7 +302,7 @@ public class SensitiveWordBs { * @since 0.0.14 */ public SensitiveWordBs ignoreNumStyle(boolean ignoreNumStyle) { - this.context.ignoreNumStyle(ignoreNumStyle); + this.ignoreNumStyle = ignoreNumStyle; return this; } @@ -265,7 +313,7 @@ public class SensitiveWordBs { * @since 0.0.14 */ public SensitiveWordBs ignoreChineseStyle(boolean ignoreChineseStyle) { - this.context.ignoreChineseStyle(ignoreChineseStyle); + this.ignoreChineseStyle = ignoreChineseStyle; return this; } @@ -276,7 +324,7 @@ public class SensitiveWordBs { * @since 0.0.14 */ public SensitiveWordBs ignoreEnglishStyle(boolean ignoreEnglishStyle) { - this.context.ignoreEnglishStyle(ignoreEnglishStyle); + this.ignoreEnglishStyle = ignoreEnglishStyle; return this; } @@ -287,37 +335,11 @@ public class SensitiveWordBs { * @since 0.0.14 */ public SensitiveWordBs ignoreRepeat(boolean ignoreRepeat) { - this.context.ignoreRepeat(ignoreRepeat); + this.ignoreRepeat = ignoreRepeat; return this; } - /** - * 构建默认的上下文 - * - * @return 结果 - * @since 0.0.4 - */ - private IWordContext buildDefaultContext() { - IWordContext wordContext = SensitiveWordContext.newInstance(); - // 格式统一化 - wordContext.ignoreCase(true); - wordContext.ignoreWidth(true); - wordContext.ignoreNumStyle(true); - wordContext.ignoreChineseStyle(true); - wordContext.ignoreEnglishStyle(true); - wordContext.ignoreRepeat(false); - - // 开启校验 - wordContext.sensitiveCheckNum(true); - wordContext.sensitiveCheckEmail(true); - wordContext.sensitiveCheckUrl(true); - - // 额外配置 - wordContext.sensitiveCheckNumLen(8); - - return wordContext; - } - + //------------------------------------------------------------------------------------ 公开方法 START /** * 是否包含敏感词 * @@ -326,9 +348,7 @@ public class SensitiveWordBs { * @since 0.0.1 */ public boolean contains(final String target) { - statusCheck(); - - return sensitiveWordMap.contains(target, context); + return wordMap.contains(target, context); } /** @@ -369,9 +389,8 @@ public class SensitiveWordBs { */ public List findAll(final String target, final IWordResultHandler handler) { ArgUtil.notNull(handler, "handler"); - statusCheck(); - List wordResults = sensitiveWordMap.findAll(target, context); + List wordResults = wordMap.findAll(target, context); return CollectionUtil.toList(wordResults, new IHandler() { @Override public R handle(IWordResult wordResult) { @@ -392,67 +411,22 @@ public class SensitiveWordBs { */ public R findFirst(final String target, final IWordResultHandler handler) { ArgUtil.notNull(handler, "handler"); - statusCheck(); - IWordResult wordResult = sensitiveWordMap.findFirst(target, context); + IWordResult wordResult = wordMap.findFirst(target, context); return handler.handle(wordResult); } - /** * 替换所有内容 * * @param target 目标字符串 - * @param replaceChar 替换为的 char - * @return 替换后结果 - * @since 0.0.2 - */ - public String replace(final String target, final char replaceChar) { - ISensitiveWordReplace replace = new SensitiveWordReplaceChar(replaceChar); - - return replace(target, replace); - } - - /** - * 替换所有内容 - * - * @param target 目标字符串 - * @param replace 替换策略 * @return 替换后结果 * @since 0.2.0 */ - public String replace(final String target, final ISensitiveWordReplace replace) { - statusCheck(); - - return sensitiveWordMap.replace(target, replace, context); - } - - /** - * 替换所有内容 - * 1. 默认使用空格替换,避免星号改变 md 的格式。 - * - * @param target 目标字符串 - * @return 替换后结果 - * @since 0.0.2 - */ public String replace(final String target) { - return this.replace(target, CharConst.STAR); + return wordMap.replace(target, context); } - - /** - * 状态校验 - * @since 0.0.13 - */ - private void statusCheck(){ - //DLC - if(sensitiveWordMap == null) { - synchronized (this) { - if(sensitiveWordMap == null) { - this.init(); - } - } - } - } + //------------------------------------------------------------------------------------ 公开方法 END } diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java index 8b2e4c1..18ff5e4 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java @@ -1,6 +1,9 @@ package com.github.houbb.sensitive.word.bs; +import com.github.houbb.sensitive.word.api.ICharFormat; +import com.github.houbb.sensitive.word.api.ISensitiveWordReplace; import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; import java.util.Map; @@ -77,6 +80,44 @@ public class SensitiveWordContext implements IWordContext { */ private int sensitiveCheckNumLen; + /** + * 检测策略 + * @since 0.3.0 + */ + private ISensitiveCheck sensitiveCheck; + + /** + * 替换策略 + * @since 0.3.0 + */ + private ISensitiveWordReplace sensitiveWordReplace; + + /** + * 格式化 + * @since 0.3.0 + */ + private ICharFormat charFormat; + + @Override + public ISensitiveWordReplace sensitiveWordReplace() { + return sensitiveWordReplace; + } + + public SensitiveWordContext sensitiveWordReplace(ISensitiveWordReplace sensitiveWordReplace) { + this.sensitiveWordReplace = sensitiveWordReplace; + return this; + } + + @Override + public ISensitiveCheck sensitiveCheck() { + return sensitiveCheck; + } + + public SensitiveWordContext sensitiveCheck(ISensitiveCheck sensitiveCheck) { + this.sensitiveCheck = sensitiveCheck; + return this; + } + /** * 私有化构造器 * @since 0.0.4 @@ -214,4 +255,13 @@ public class SensitiveWordContext implements IWordContext { return this; } + @Override + public ICharFormat charFormat() { + return charFormat; + } + + public SensitiveWordContext charFormat(ICharFormat charFormat) { + this.charFormat = charFormat; + return this; + } } diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/package-info.java b/src/main/java/com/github/houbb/sensitive/word/bs/package-info.java deleted file mode 100644 index e6f6053..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/bs/package-info.java +++ /dev/null @@ -1,5 +0,0 @@ -/** - * 引导类定义 - * @since 0.0.1 - */ -package com.github.houbb.sensitive.word.bs; \ No newline at end of file diff --git a/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java b/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java index 1b239bb..0950534 100644 --- a/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java +++ b/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java @@ -43,4 +43,10 @@ public final class AppConst { */ public static final String SENSITIVE_WORD_ALLOW_PATH = "/sensitive_word_allow.txt"; + /** + * 最长的网址长度 + * @since 0.3.0 + */ + public static final int MAX_WEB_SITE_LEN = 70; + } diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java index 2fe076b..bad461a 100644 --- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java +++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWordHelper.java @@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.core; import com.github.houbb.sensitive.word.api.ISensitiveWordReplace; import com.github.houbb.sensitive.word.api.IWordResultHandler; import com.github.houbb.sensitive.word.bs.SensitiveWordBs; +import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaces; import java.util.List; @@ -69,7 +70,8 @@ public final class SensitiveWordHelper { * @since 0.2.0 */ public static String replace(final String target, final ISensitiveWordReplace replace) { - return WORD_BS.replace(target, replace); + SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().sensitiveWordReplace(replace).init(); + return sensitiveWordBs.replace(target); } /** @@ -81,7 +83,9 @@ public final class SensitiveWordHelper { * @since 0.0.13 */ public static String replace(final String target, final char replaceChar) { - return WORD_BS.replace(target, replaceChar); + final ISensitiveWordReplace replace = SensitiveWordReplaces.chars(replaceChar); + + return replace(target, replace); } /** diff --git a/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowSystem.java b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowSystem.java index 284da5d..c596776 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowSystem.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowSystem.java @@ -15,6 +15,15 @@ import java.util.List; @ThreadSafe public class WordAllowSystem implements IWordAllow { + /** + * @since 0.3.0 + */ + private static final WordAllowSystem INSTANCE = new WordAllowSystem(); + + public static WordAllowSystem getInstance() { + return INSTANCE; + } + @Override public List allow() { return StreamUtil.readAllLines("/sensitive_word_allow.txt"); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java index f5ce28f..48f6cd1 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java @@ -1,6 +1,5 @@ package com.github.houbb.sensitive.word.support.allow; -import com.github.houbb.heaven.support.instance.impl.Instances; import com.github.houbb.heaven.support.pipeline.Pipeline; import com.github.houbb.heaven.util.util.ArrayUtil; import com.github.houbb.sensitive.word.api.IWordAllow; @@ -43,7 +42,7 @@ public final class WordAllows { * @since 0.0.13 */ public static IWordAllow system() { - return Instances.singleton(WordAllowSystem.class); + return WordAllowSystem.getInstance(); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckChain.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckChain.java deleted file mode 100644 index 2d099fa..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckChain.java +++ /dev/null @@ -1,58 +0,0 @@ -package com.github.houbb.sensitive.word.support.check.impl; - -import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.heaven.support.instance.impl.Instances; -import com.github.houbb.heaven.util.guava.Guavas; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; -import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; -import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; - -import java.util.List; - -/** - * 敏感词检测责任链模式 - * - * 这里可以提供一个公共的父类。 - * - * - * DFA 算法的优化可以参考论文: - * 【DFA 算法】各种论文。 - * - * @author binbin.hou - * @since 0.0.5 - */ -@ThreadSafe -public class SensitiveCheckChain implements ISensitiveCheck { - - @Override - public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { - // 初始化责任链 - List sensitiveChecks = Guavas.newArrayList(); - // 默认添加敏感词校验 - sensitiveChecks.add(Instances.singleton(SensitiveCheckWord.class)); - if(context.sensitiveCheckNum()) { - sensitiveChecks.add(Instances.singleton(SensitiveCheckNum.class)); - } - if(context.sensitiveCheckEmail()) { - sensitiveChecks.add(Instances.singleton(SensitiveCheckEmail.class)); - } - if(context.sensitiveCheckUrl()) { - sensitiveChecks.add(Instances.singleton(SensitiveCheckUrl.class)); - } - - // 循环调用 - for(ISensitiveCheck sensitiveCheck : sensitiveChecks) { - SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(txt, beginIndex, validModeEnum, context); - - if(result.index() > 0) { - return result; - } - } - - // 这里直接进行正则表达式相关的调用。 - // 默认返回 0 - return SensitiveCheckResult.of(0, SensitiveCheckChain.class); - } - -} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java index 3718b5c..1429d03 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java @@ -1,14 +1,12 @@ package com.github.houbb.sensitive.word.support.check.impl; import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.heaven.support.instance.impl.Instances; import com.github.houbb.heaven.util.lang.CharUtil; import com.github.houbb.heaven.util.util.regex.RegexUtil; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; -import com.github.houbb.sensitive.word.support.format.CharFormatChain; /** * email 正则表达式检测实现。 @@ -28,6 +26,15 @@ import com.github.houbb.sensitive.word.support.format.CharFormatChain; @ThreadSafe public class SensitiveCheckEmail implements ISensitiveCheck { + /** + * @since 0.3.0 + */ + private static final ISensitiveCheck INSTANCE = new SensitiveCheckEmail(); + + public static ISensitiveCheck getInstance() { + return INSTANCE; + } + @Override public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { // 记录敏感词的长度 @@ -40,7 +47,7 @@ public class SensitiveCheckEmail implements ISensitiveCheck { // 后期如果有想法,对 DFA 进一步深入学习后,将进行优化。 for(int i = beginIndex; i < txt.length(); i++) { char currentChar = txt.charAt(i); - char mappingChar = Instances.singleton(CharFormatChain.class) + char mappingChar = context.charFormat() .format(currentChar, context); if(CharUtil.isEmilChar(mappingChar)) { diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java new file mode 100644 index 0000000..4572c12 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckInit.java @@ -0,0 +1,51 @@ +package com.github.houbb.sensitive.word.support.check.impl; + +import com.github.houbb.heaven.support.pipeline.Pipeline; +import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; +import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; + +import java.util.List; + +/** + * 检测初始化类 + * @since 0.3.0 + */ +public abstract class SensitiveCheckInit implements ISensitiveCheck { + + /** + * 初始化列表 + * + * @param pipeline 当前列表泳道 + * @since 0.0.13 + */ + protected abstract void init(final Pipeline pipeline); + + + @Override + public SensitiveCheckResult sensitiveCheck(String txt, + int beginIndex, + ValidModeEnum validModeEnum, + IWordContext context) { + + Pipeline pipeline = new DefaultPipeline<>(); + this.init(pipeline); + List sensitiveChecks = pipeline.list(); + + // 循环调用 + for(ISensitiveCheck sensitiveCheck : sensitiveChecks) { + SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(txt, beginIndex, validModeEnum, context); + + if(result.index() > 0) { + return result; + } + } + + // 这里直接进行正则表达式相关的调用。 + // 默认返回 0 + return SensitiveCheckNone.getNoneResult(); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java new file mode 100644 index 0000000..3376d7b --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNone.java @@ -0,0 +1,41 @@ +package com.github.houbb.sensitive.word.support.check.impl; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; +import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; + +/** + * 未匹配 + * + * @author binbin.hou + * @since 0.3.0 + */ +@ThreadSafe +public class SensitiveCheckNone implements ISensitiveCheck { + + /** + * @since 0.3.0 + */ + private static final ISensitiveCheck INSTANCE = new SensitiveCheckNone(); + + public static ISensitiveCheck getInstance() { + return INSTANCE; + } + + /** + * 只有一个未匹配 + */ + private static final SensitiveCheckResult NONE_RESULT = SensitiveCheckResult.of(0, SensitiveCheckNone.class); + + @Override + public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { + return NONE_RESULT; + } + + public static SensitiveCheckResult getNoneResult() { + return NONE_RESULT; + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java index 7facc1f..2824e6a 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java @@ -1,12 +1,10 @@ package com.github.houbb.sensitive.word.support.check.impl; import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.heaven.support.instance.impl.Instances; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; -import com.github.houbb.sensitive.word.support.format.CharFormatChain; /** * 敏感词监测实现 @@ -18,6 +16,15 @@ import com.github.houbb.sensitive.word.support.format.CharFormatChain; @ThreadSafe public class SensitiveCheckNum implements ISensitiveCheck { + /** + * @since 0.3.0 + */ + private static final ISensitiveCheck INSTANCE = new SensitiveCheckNum(); + + public static ISensitiveCheck getInstance() { + return INSTANCE; + } + @Override public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { // 记录敏感词的长度 @@ -26,7 +33,7 @@ public class SensitiveCheckNum implements ISensitiveCheck { for (int i = beginIndex; i < txt.length(); i++) { char c = txt.charAt(i); - char charKey = Instances.singleton(CharFormatChain.class).format(c, context); + char charKey = context.charFormat().format(c, context); // 如果是数字 // 满足进入的条件 diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java index d5760fb..971b6e9 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java @@ -1,15 +1,13 @@ package com.github.houbb.sensitive.word.support.check.impl; -import com.github.houbb.heaven.annotation.CommonEager; import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.heaven.support.instance.impl.Instances; import com.github.houbb.heaven.util.lang.CharUtil; import com.github.houbb.heaven.util.util.regex.RegexUtil; import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.constant.AppConst; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; -import com.github.houbb.sensitive.word.support.format.CharFormatChain; /** * URL 正则表达式检测实现。 @@ -27,10 +25,13 @@ import com.github.houbb.sensitive.word.support.format.CharFormatChain; public class SensitiveCheckUrl implements ISensitiveCheck { /** - * 最长的网址长度 - * @since 0.0.12 + * @since 0.3.0 */ - private static final int MAX_WEB_SITE_LEN = 70; + private static final ISensitiveCheck INSTANCE = new SensitiveCheckUrl(); + + public static ISensitiveCheck getInstance() { + return INSTANCE; + } @Override public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { @@ -44,11 +45,11 @@ public class SensitiveCheckUrl implements ISensitiveCheck { // 后期如果有想法,对 DFA 进一步深入学习后,将进行优化。 for(int i = beginIndex; i < txt.length(); i++) { char currentChar = txt.charAt(i); - char mappingChar = Instances.singleton(CharFormatChain.class) + char mappingChar = context.charFormat() .format(currentChar, context); if(CharUtil.isWebSiteChar(mappingChar) - && lengthCount <= MAX_WEB_SITE_LEN) { + && lengthCount <= AppConst.MAX_WEB_SITE_LEN) { lengthCount++; stringBuilder.append(currentChar); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java index 9b6a364..8b91345 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java @@ -1,14 +1,12 @@ package com.github.houbb.sensitive.word.support.check.impl; import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.heaven.support.instance.impl.Instances; import com.github.houbb.heaven.util.lang.ObjectUtil; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.constant.AppConst; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; -import com.github.houbb.sensitive.word.support.format.CharFormatChain; import java.util.Map; @@ -20,6 +18,15 @@ import java.util.Map; @ThreadSafe public class SensitiveCheckWord implements ISensitiveCheck { + /** + * @since 0.3.0 + */ + private static final ISensitiveCheck INSTANCE = new SensitiveCheckWord(); + + public static ISensitiveCheck getInstance() { + return INSTANCE; + } + @Override public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { Map nowMap = context.sensitiveWordMap(); @@ -90,7 +97,7 @@ public class SensitiveCheckWord implements ISensitiveCheck { final String txt, final int index) { char c = txt.charAt(index); - char mappingChar = Instances.singleton(CharFormatChain.class).format(c, context); + char mappingChar = context.charFormat().format(c, context); // 这里做一次重复词的处理 //TODO: 这里可以优化,是否获取一次。 @@ -99,7 +106,7 @@ public class SensitiveCheckWord implements ISensitiveCheck { if(context.ignoreRepeat() && index > 0) { char preChar = txt.charAt(index-1); - char preMappingChar = Instances.singleton(CharFormatChain.class) + char preMappingChar = context.charFormat() .format(preChar, context); // 直接赋值为上一个 map diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveChecks.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveChecks.java new file mode 100644 index 0000000..71614e7 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveChecks.java @@ -0,0 +1,97 @@ +package com.github.houbb.sensitive.word.support.check.impl; + +import com.github.houbb.heaven.support.pipeline.Pipeline; +import com.github.houbb.heaven.util.util.ArrayUtil; +import com.github.houbb.heaven.util.util.CollectionUtil; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +/** + * 敏感词检测工具 + * @since 0.3.0 + */ +public final class SensitiveChecks { + + private SensitiveChecks(){} + + /** + * 初始化敏感检测策略 + * @param context 上下文 + * + * @return 实现 + * @since 0.3.0 + */ + public static ISensitiveCheck initSensitiveCheck(final IWordContext context) { + List sensitiveCheckList = new ArrayList<>(); + + // 默认添加敏感词校验 + sensitiveCheckList.add(SensitiveChecks.word()); + + if(context.sensitiveCheckNum()) { + sensitiveCheckList.add(SensitiveChecks.num()); + } + if(context.sensitiveCheckEmail()) { + sensitiveCheckList.add(SensitiveChecks.email()); + } + if(context.sensitiveCheckUrl()) { + sensitiveCheckList.add(SensitiveChecks.url()); + } + + return SensitiveChecks.chains(sensitiveCheckList); + } + + public static ISensitiveCheck chains(final ISensitiveCheck... sensitiveChecks) { + if (ArrayUtil.isEmpty(sensitiveChecks)){ + return none(); + } + + return new SensitiveCheckInit() { + @Override + protected void init(Pipeline pipeline) { + for(ISensitiveCheck check : sensitiveChecks) { + pipeline.addLast(check); + } + } + }; + } + + public static ISensitiveCheck chains(final Collection sensitiveChecks) { + if (CollectionUtil.isEmpty(sensitiveChecks)){ + return none(); + } + + return new SensitiveCheckInit() { + @Override + protected void init(Pipeline pipeline) { + for(ISensitiveCheck check : sensitiveChecks) { + pipeline.addLast(check); + } + } + }; + } + + public static ISensitiveCheck email() { + return SensitiveCheckEmail.getInstance(); + } + + public static ISensitiveCheck num() { + return SensitiveCheckNum.getInstance(); + } + + public static ISensitiveCheck url() { + return SensitiveCheckUrl.getInstance(); + } + + public static ISensitiveCheck word() { + return SensitiveCheckWord.getInstance(); + } + + public static ISensitiveCheck none() { + return SensitiveCheckNone.getInstance(); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java b/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java deleted file mode 100644 index f0159f0..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.github.houbb.sensitive.word.support.data; - -import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.heaven.util.guava.Guavas; -import com.github.houbb.heaven.util.io.StreamUtil; -import com.github.houbb.heaven.util.util.CollectionUtil; -import com.github.houbb.sensitive.word.api.IWordData; -import com.github.houbb.sensitive.word.constant.AppConst; - -import java.util.List; - -/** - * 数据加载使用单例的模式,只需要加载一次即可。 - * - * @author binbin.hou - * @since 0.0.1 - */ -@ThreadSafe -public class SensitiveWordData implements IWordData { - - /** - * 默认的内置行 - * - * @since 0.0.1 - */ - private static List defaultLines; - - static { - synchronized (SensitiveWordData.class) { - long start = System.currentTimeMillis(); - defaultLines = Guavas.newArrayList(AppConst.DICT_SIZE+AppConst.DICT_EN_SIZE); - defaultLines = StreamUtil.readAllLines("/dict.txt"); - defaultLines.addAll(StreamUtil.readAllLines("/dict_en.txt")); - - // 用户自定义 - List denyList = StreamUtil.readAllLines("/sensitive_word_deny.txt"); - defaultLines.addAll(denyList); - - // 移除白名单词语 - List allowList = StreamUtil.readAllLines("/sensitive_word_allow.txt"); - defaultLines = CollectionUtil.difference(defaultLines, allowList); - - long end = System.currentTimeMillis(); - } - } - - - @Override - public List getWordData() { - return defaultLines; - } - -} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenySystem.java b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenySystem.java index f7282db..0d117b6 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenySystem.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenySystem.java @@ -16,6 +16,15 @@ import java.util.List; @ThreadSafe public class WordDenySystem implements IWordDeny { + /** + * @since 0.3.0 + */ + private static final IWordDeny INSTANCE = new WordDenySystem(); + + public static IWordDeny getInstance() { + return INSTANCE; + } + @Override public List deny() { List results = StreamUtil.readAllLines("/dict.txt"); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java index c15452e..e80be0e 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java @@ -1,6 +1,5 @@ package com.github.houbb.sensitive.word.support.deny; -import com.github.houbb.heaven.support.instance.impl.Instances; import com.github.houbb.heaven.support.pipeline.Pipeline; import com.github.houbb.heaven.util.util.ArrayUtil; import com.github.houbb.sensitive.word.api.IWordDeny; @@ -43,7 +42,7 @@ public final class WordDenys { * @since 0.0.13 */ public static IWordDeny system() { - return Instances.singleton(WordDenySystem.class); + return WordDenySystem.getInstance(); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatChain.java b/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatChain.java deleted file mode 100644 index 0b72d8f..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatChain.java +++ /dev/null @@ -1,48 +0,0 @@ -package com.github.houbb.sensitive.word.support.format; - -import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.heaven.support.instance.impl.Instances; -import com.github.houbb.heaven.util.guava.Guavas; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.api.ICharFormat; - -import java.util.List; - -/** - * 格式化责任链 - * @author binbin.hou - * @since 0.0.5 - */ -@ThreadSafe -public class CharFormatChain implements ICharFormat { - - @Override - public char format(char original, IWordContext context) { - char result = original; - - List charFormats = Guavas.newArrayList(); - if(context.ignoreEnglishStyle()) { - charFormats.add(Instances.singleton(IgnoreEnglishStyleFormat.class)); - } - if(context.ignoreCase()) { - charFormats.add(Instances.singleton(IgnoreCaseCharFormat.class)); - } - if(context.ignoreWidth()) { - charFormats.add(Instances.singleton(IgnoreWidthCharFormat.class)); - } - if(context.ignoreNumStyle()) { - charFormats.add(Instances.singleton(IgnoreNumStyleCharFormat.class)); - } - if(context.ignoreChineseStyle()) { - charFormats.add(Instances.singleton(IgnoreChineseStyleFormat.class)); - } - - // 循环执行 - for(ICharFormat charFormat : charFormats) { - result = charFormat.format(result, context); - } - - return result; - } - -} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatInit.java b/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatInit.java new file mode 100644 index 0000000..31e2f0e --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormatInit.java @@ -0,0 +1,43 @@ +package com.github.houbb.sensitive.word.support.format; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.heaven.support.pipeline.Pipeline; +import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline; +import com.github.houbb.sensitive.word.api.ICharFormat; +import com.github.houbb.sensitive.word.api.IWordContext; + +import java.util.List; + +/** + * 格式化责任链 + * @author binbin.hou + * @since 0.0.5 + */ +@ThreadSafe +public abstract class CharFormatInit implements ICharFormat { + + /** + * 初始化列表 + * + * @param pipeline 当前列表泳道 + * @since 0.0.13 + */ + protected abstract void init(final Pipeline pipeline); + + @Override + public char format(char original, IWordContext context) { + Pipeline pipeline = new DefaultPipeline<>(); + init(pipeline); + + char result = original; + + // 循环执行 + List charFormats = pipeline.list(); + for(ICharFormat charFormat : charFormats) { + result = charFormat.format(result, context); + } + + return result; + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormats.java b/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormats.java new file mode 100644 index 0000000..e4dbccf --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/CharFormats.java @@ -0,0 +1,112 @@ +package com.github.houbb.sensitive.word.support.format; + +import com.github.houbb.heaven.support.pipeline.Pipeline; +import com.github.houbb.heaven.util.guava.Guavas; +import com.github.houbb.heaven.util.util.ArrayUtil; +import com.github.houbb.heaven.util.util.CollectionUtil; +import com.github.houbb.sensitive.word.api.ICharFormat; +import com.github.houbb.sensitive.word.api.IWordContext; + +import java.util.Collection; +import java.util.List; + +/** + * 格式化工具类 + * @author binbin.hou + * @since 0.3.5 + */ +public final class CharFormats { + + private CharFormats(){} + + /** + * 初始化格式化 + * @param context 上下文 + * @return 结果 + * @since 0.3.0 + */ + public static ICharFormat initCharFormat(final IWordContext context) { + List charFormats = Guavas.newArrayList(); + if(context.ignoreEnglishStyle()) { + charFormats.add(ignoreEnglishStyle()); + } + if(context.ignoreCase()) { + charFormats.add(ignoreCase()); + } + if(context.ignoreWidth()) { + charFormats.add(ignoreWidth()); + } + if(context.ignoreNumStyle()) { + charFormats.add(ignoreNumStyle()); + } + if(context.ignoreChineseStyle()) { + charFormats.add(ignoreChineseStyle()); + } + + return chains(charFormats); + } + + /** + * 链式 + * @param charFormats 列表 + * @return 结果 + */ + public static ICharFormat chains(final ICharFormat ... charFormats) { + if(ArrayUtil.isEmpty(charFormats)) { + return none(); + } + + return new CharFormatInit() { + @Override + protected void init(Pipeline pipeline) { + for(ICharFormat charFormat : charFormats) { + pipeline.addLast(charFormat); + } + } + }; + } + + /** + * 链式 + * @param charFormats 列表 + * @return 结果 + */ + public static ICharFormat chains(final Collection charFormats) { + if(CollectionUtil.isEmpty(charFormats)) { + return none(); + } + + return new CharFormatInit() { + @Override + protected void init(Pipeline pipeline) { + for(ICharFormat charFormat : charFormats) { + pipeline.addLast(charFormat); + } + } + }; + } + + public static ICharFormat none() { + return NoneCharFormat.getInstance(); + } + public static ICharFormat ignoreCase() { + return IgnoreCaseCharFormat.getInstance(); + } + + public static ICharFormat ignoreEnglishStyle() { + return IgnoreEnglishStyleFormat.getInstance(); + } + + public static ICharFormat ignoreChineseStyle() { + return IgnoreChineseStyleFormat.getInstance(); + } + + public static ICharFormat ignoreNumStyle() { + return IgnoreNumStyleCharFormat.getInstance(); + } + + public static ICharFormat ignoreWidth() { + return IgnoreWidthCharFormat.getInstance(); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreCaseCharFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreCaseCharFormat.java index 9c43d22..bcc69ad 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreCaseCharFormat.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreCaseCharFormat.java @@ -12,6 +12,12 @@ import com.github.houbb.sensitive.word.api.IWordContext; @ThreadSafe public class IgnoreCaseCharFormat implements ICharFormat { + private static final ICharFormat INSTANCE = new IgnoreCaseCharFormat(); + + public static ICharFormat getInstance() { + return INSTANCE; + } + @Override public char format(char original, IWordContext context) { return Character.toLowerCase(original); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreChineseStyleFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreChineseStyleFormat.java index 60efcba..fa9e375 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreChineseStyleFormat.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreChineseStyleFormat.java @@ -7,13 +7,19 @@ import com.github.houbb.sensitive.word.api.ICharFormat; import com.github.houbb.sensitive.word.api.IWordContext; /** - * 忽略大小写 + * 忽略中文样式 * @author binbin.hou * @since 0.0.5 */ @ThreadSafe public class IgnoreChineseStyleFormat implements ICharFormat { + private static final ICharFormat INSTANCE = new IgnoreChineseStyleFormat(); + + public static ICharFormat getInstance() { + return INSTANCE; + } + @Override public char format(char original, IWordContext context) { String string = String.valueOf(original); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreEnglishStyleFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreEnglishStyleFormat.java index e132254..c8b3148 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreEnglishStyleFormat.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreEnglishStyleFormat.java @@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.format; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.sensitive.word.api.ICharFormat; import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.utils.CharUtils; +import com.github.houbb.sensitive.word.utils.InnerCharUtils; /** * 忽略英文的各种格式 @@ -13,9 +13,15 @@ import com.github.houbb.sensitive.word.utils.CharUtils; @ThreadSafe public class IgnoreEnglishStyleFormat implements ICharFormat { + private static final ICharFormat INSTANCE = new IgnoreEnglishStyleFormat(); + + public static ICharFormat getInstance() { + return INSTANCE; + } + @Override public char format(char original, IWordContext context) { - return CharUtils.getMappingChar(original); + return InnerCharUtils.getMappingChar(original); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreNumStyleCharFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreNumStyleCharFormat.java index 2923626..4fd8511 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreNumStyleCharFormat.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreNumStyleCharFormat.java @@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.support.format; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.ICharFormat; -import com.github.houbb.sensitive.word.utils.NumUtils; +import com.github.houbb.sensitive.word.utils.InnerNumUtils; /** * 忽略数字的样式 @@ -13,9 +13,15 @@ import com.github.houbb.sensitive.word.utils.NumUtils; @ThreadSafe public class IgnoreNumStyleCharFormat implements ICharFormat { + private static final ICharFormat INSTANCE = new IgnoreNumStyleCharFormat(); + + public static ICharFormat getInstance() { + return INSTANCE; + } + @Override public char format(char original, IWordContext context) { - return NumUtils.getMappingChar(original); + return InnerNumUtils.getMappingChar(original); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreWidthCharFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreWidthCharFormat.java index 64f8f38..e34cab3 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreWidthCharFormat.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/IgnoreWidthCharFormat.java @@ -6,13 +6,19 @@ import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.ICharFormat; /** - * 格式化责任链 + * 格式化字宽度 * @author binbin.hou * @since 0.0.5 */ @ThreadSafe public class IgnoreWidthCharFormat implements ICharFormat { + private static final ICharFormat INSTANCE = new IgnoreWidthCharFormat(); + + public static ICharFormat getInstance() { + return INSTANCE; + } + @Override public char format(char original, IWordContext context) { return CharUtil.toHalfWidth(original); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/NoneCharFormat.java b/src/main/java/com/github/houbb/sensitive/word/support/format/NoneCharFormat.java new file mode 100644 index 0000000..c1c092d --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/NoneCharFormat.java @@ -0,0 +1,27 @@ +package com.github.houbb.sensitive.word.support.format; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.sensitive.word.api.ICharFormat; +import com.github.houbb.sensitive.word.api.IWordContext; + +/** + * 无处理 + * + * @author binbin.hou + * @since 0.0.5 + */ +@ThreadSafe +public class NoneCharFormat implements ICharFormat { + + private static final ICharFormat INSTANCE = new NoneCharFormat(); + + public static ICharFormat getInstance() { + return INSTANCE; + } + + @Override + public char format(char original, IWordContext context) { + return original; + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java index dc7b28f..8b96a52 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java @@ -1,19 +1,15 @@ package com.github.houbb.sensitive.word.support.map; import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.heaven.support.instance.impl.Instances; import com.github.houbb.heaven.util.guava.Guavas; import com.github.houbb.heaven.util.io.FileUtil; -import com.github.houbb.heaven.util.lang.CharUtil; import com.github.houbb.heaven.util.lang.ObjectUtil; import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.heaven.util.util.CollectionUtil; -import com.github.houbb.heaven.util.util.MapUtil; import com.github.houbb.sensitive.word.api.*; import com.github.houbb.sensitive.word.constant.AppConst; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; -import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckChain; import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl; import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceContext; import com.github.houbb.sensitive.word.support.result.WordResult; @@ -52,7 +48,6 @@ public class SensitiveWordMap implements IWordMap { @Override @SuppressWarnings("unchecked") public synchronized void initWordMap(Collection collection) { - long startTime = System.currentTimeMillis(); // 避免扩容带来的消耗 Map newInnerWordMap = new HashMap(collection.size()); @@ -99,8 +94,6 @@ public class SensitiveWordMap implements IWordMap { // 最后更新为新的 map,保证更新过程中旧的数据可用 this.innerWordMap = newInnerWordMap; - - long endTime = System.currentTimeMillis(); } /** @@ -154,12 +147,12 @@ public class SensitiveWordMap implements IWordMap { } @Override - public String replace(String target, final ISensitiveWordReplace replace, final IWordContext context) { + public String replace(String target, final IWordContext context) { if(StringUtil.isEmpty(target)) { return target; } - return this.replaceSensitiveWord(target, replace, context); + return this.replaceSensitiveWord(target, context); } /** @@ -211,13 +204,11 @@ public class SensitiveWordMap implements IWordMap { /** * 直接替换敏感词,返回替换后的结果 * @param target 文本信息 - * @param replace 替换策略 * @param context 上下文 * @return 脱敏后的字符串 * @since 0.0.2 */ private String replaceSensitiveWord(final String target, - final ISensitiveWordReplace replace, final IWordContext context) { if(StringUtil.isEmpty(target)) { return target; @@ -245,7 +236,7 @@ public class SensitiveWordMap implements IWordMap { ISensitiveWordReplaceContext replaceContext = SensitiveWordReplaceContext.newInstance() .sensitiveWord(string) .wordLength(wordLength); - String replaceStr = replace.replace(replaceContext); + String replaceStr = context.sensitiveWordReplace().replace(replaceContext); resultBuilder.append(replaceStr); } @@ -267,7 +258,7 @@ public class SensitiveWordMap implements IWordMap { context.sensitiveWordMap(innerWordMap); // 责任链模式调用 - return Instances.singleton(SensitiveCheckChain.class) + return context.sensitiveCheck() .sensitiveCheck(txt, beginIndex, validModeEnum, context); } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/map/WordMaps.java b/src/main/java/com/github/houbb/sensitive/word/support/map/WordMaps.java new file mode 100644 index 0000000..48a8181 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/map/WordMaps.java @@ -0,0 +1,24 @@ +package com.github.houbb.sensitive.word.support.map; + +import com.github.houbb.sensitive.word.api.IWordMap; + +/** + * 敏感词 map + * + * @author binbin.hou + * @since 0.3.0 + */ +public final class WordMaps { + + private WordMaps(){} + + /** + * 默认策略 + * @return 策略 + * @since 0.3.0 + */ + public static IWordMap defaults() { + return new SensitiveWordMap(); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java index 4893eee..02de3ec 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaceChar.java @@ -1,6 +1,7 @@ package com.github.houbb.sensitive.word.support.replace; import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.heaven.constant.CharConst; import com.github.houbb.heaven.util.lang.CharUtil; import com.github.houbb.sensitive.word.api.ISensitiveWordReplace; import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext; @@ -13,12 +14,20 @@ import com.github.houbb.sensitive.word.api.ISensitiveWordReplaceContext; @ThreadSafe public class SensitiveWordReplaceChar implements ISensitiveWordReplace { + /** + * 替换的字符 + * @since 0.3.0 + */ private final char replaceChar; public SensitiveWordReplaceChar(char replaceChar) { this.replaceChar = replaceChar; } + public SensitiveWordReplaceChar() { + this(CharConst.STAR); + } + @Override public String replace(ISensitiveWordReplaceContext context) { int wordLength = context.wordLength(); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaces.java b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaces.java new file mode 100644 index 0000000..552547e --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaces.java @@ -0,0 +1,34 @@ +package com.github.houbb.sensitive.word.support.replace; + +import com.github.houbb.sensitive.word.api.ISensitiveWordReplace; + +/** + * 字符替换策略工具类 + * + * @author binbin.hou + * @since 0.3.0 + */ +public final class SensitiveWordReplaces { + + private SensitiveWordReplaces(){} + + /** + * 字符 + * @param c 字符 + * @return 结果 + * @since 0.3.0 + */ + public static ISensitiveWordReplace chars(final char c) { + return new SensitiveWordReplaceChar(c); + } + + /** + * 字符,默认为 * + * @return 结果 + * @since 0.3.0 + */ + public static ISensitiveWordReplace chars() { + return new SensitiveWordReplaceChar(); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java index a34b5c2..67f110f 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerRaw.java @@ -12,6 +12,15 @@ import com.github.houbb.sensitive.word.api.IWordResultHandler; @ThreadSafe public class WordResultHandlerRaw implements IWordResultHandler { + /** + * @since 0.3.0 + */ + private static final WordResultHandlerRaw INSTANCE = new WordResultHandlerRaw(); + + public static WordResultHandlerRaw getInstance() { + return INSTANCE; + } + @Override public IWordResult handle(IWordResult wordResult) { return wordResult; diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java index 9853c5a..7048759 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlerWord.java @@ -13,6 +13,15 @@ import com.github.houbb.sensitive.word.api.IWordResultHandler; @ThreadSafe public class WordResultHandlerWord implements IWordResultHandler { + /** + * @since 0.3.0 + */ + private static final WordResultHandlerWord INSTANCE = new WordResultHandlerWord(); + + public static WordResultHandlerWord getInstance() { + return INSTANCE; + } + @Override public String handle(IWordResult wordResult) { if(wordResult == null) { diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlers.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlers.java index 599cb87..b77e6b7 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlers.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordResultHandlers.java @@ -1,6 +1,5 @@ package com.github.houbb.sensitive.word.support.result; -import com.github.houbb.heaven.support.instance.impl.Instances; import com.github.houbb.sensitive.word.api.IWordResult; import com.github.houbb.sensitive.word.api.IWordResultHandler; @@ -19,7 +18,7 @@ public final class WordResultHandlers { * @since 0.1.0 */ public static IWordResultHandler raw() { - return Instances.singleton(WordResultHandlerRaw.class); + return WordResultHandlerRaw.getInstance(); } /** @@ -28,7 +27,7 @@ public final class WordResultHandlers { * @since 0.1.0 */ public static IWordResultHandler word() { - return Instances.singleton(WordResultHandlerWord.class); + return WordResultHandlerWord.getInstance(); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/CharUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java similarity index 96% rename from src/main/java/com/github/houbb/sensitive/word/utils/CharUtils.java rename to src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java index c3fcc3a..f4b9e8b 100644 --- a/src/main/java/com/github/houbb/sensitive/word/utils/CharUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java @@ -12,9 +12,9 @@ import java.util.Map; * @author Administrator * @since 0.0.4 */ -public final class CharUtils { +public final class InnerCharUtils { - private CharUtils() { + private InnerCharUtils() { } /** diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java index 295b401..d289e5a 100644 --- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerFormatUtils.java @@ -1,10 +1,12 @@ package com.github.houbb.sensitive.word.utils; -import com.github.houbb.heaven.support.instance.impl.Instances; import com.github.houbb.heaven.util.lang.StringUtil; +import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.sensitive.word.api.ICharFormat; import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.support.format.CharFormatChain; + +import java.util.ArrayList; +import java.util.List; /** * 内部格式化工具类 @@ -21,13 +23,13 @@ public final class InnerFormatUtils { * @return 结果 * @since 0.1.1 */ - public static String format(String original, IWordContext context) { + public static String format(final String original, final IWordContext context) { if(StringUtil.isEmpty(original)) { return original; } StringBuilder stringBuilder = new StringBuilder(); - ICharFormat charFormat = Instances.singleton(CharFormatChain.class); + ICharFormat charFormat = context.charFormat(); char[] chars = original.toCharArray(); for(char c : chars) { char cf = charFormat.format(c, context); @@ -37,4 +39,26 @@ public final class InnerFormatUtils { return stringBuilder.toString(); } + /** + * 格式化列表 + * @param list 列表 + * @param context 上下文 + * @return 结果 + * @since 0。3.0 + */ + public static List formatWordList(List list, + final IWordContext context) { + if(CollectionUtil.isEmpty(list)) { + return list; + } + + List resultList = new ArrayList<>(list.size()); + for(String word : list) { + String formatWord = InnerFormatUtils.format(word, context); + resultList.add(formatWord); + } + + return resultList; + } + } diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/NumUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerNumUtils.java similarity index 98% rename from src/main/java/com/github/houbb/sensitive/word/utils/NumUtils.java rename to src/main/java/com/github/houbb/sensitive/word/utils/InnerNumUtils.java index 486bbe5..62b40c1 100644 --- a/src/main/java/com/github/houbb/sensitive/word/utils/NumUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerNumUtils.java @@ -15,9 +15,9 @@ import java.util.Map; * @author Administrator * @since 0.0.4 */ -public final class NumUtils { +public final class InnerNumUtils { - private NumUtils(){} + private InnerNumUtils(){} private static final String NUM_ONE = "⓪0零º₀⓿○" + "123456789" + diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordDataUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordDataUtils.java new file mode 100644 index 0000000..3df64f6 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordDataUtils.java @@ -0,0 +1,48 @@ +package com.github.houbb.sensitive.word.utils; + +import com.github.houbb.heaven.util.util.CollectionUtil; +import com.github.houbb.sensitive.word.api.IWordContext; + +import java.util.*; + +/** + * 数据工具包 + * @since 0.3.0 + */ +public final class InnerWordDataUtils { + + private InnerWordDataUtils(){} + + /** + * 获取禁止列表中真正的禁止词汇 + * @param denyList 禁止 + * @param allowList 允许 + * @return 结果 + * @since 0.3.0 + */ + public static List getActualDenyList(List denyList, List allowList, + final IWordContext context) { + if(CollectionUtil.isEmpty(denyList)) { + return Collections.emptyList(); + } + if(CollectionUtil.isEmpty(allowList)) { + return denyList; + } + + List formatDenyList = InnerFormatUtils.formatWordList(denyList, context); + List formatAllowList = InnerFormatUtils.formatWordList(allowList, context); + + List resultList = new ArrayList<>(); + // O(1) + Set allowSet = new HashSet<>(formatAllowList); + + for(String deny : formatDenyList) { + if(allowSet.contains(deny)) { + continue; + } + + resultList.add(deny); + } + return resultList; + } +} diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsChineseTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsChineseTest.java index 5fa0485..b157b04 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsChineseTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsChineseTest.java @@ -22,7 +22,7 @@ public class SensitiveWordBsChineseTest { public void ignoreChineseStyleTest() { final String text = "我爱我的祖国和五星紅旗。"; - List wordList = SensitiveWordBs.newInstance().findAll(text); + List wordList = SensitiveWordBs.newInstance().init().findAll(text); Assert.assertEquals("[祖国, 五星紅旗]", wordList.toString()); } diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEmailTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEmailTest.java index c930774..b800fc1 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEmailTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEmailTest.java @@ -22,7 +22,7 @@ public class SensitiveWordBsEmailTest { public void emailEnglishTest() { final String text = "楼主好人,邮箱 sensitiveword@xx.com"; - List wordList = SensitiveWordBs.newInstance().findAll(text); + List wordList = SensitiveWordBs.newInstance().init().findAll(text); Assert.assertEquals("[邮箱, sensitiveword@xx.com]", wordList.toString()); } @@ -34,7 +34,7 @@ public class SensitiveWordBsEmailTest { public void emailNumberTest() { final String text = "楼主好人,邮箱 123456789@xx.com"; - List wordList = SensitiveWordBs.newInstance().findAll(text); + List wordList = SensitiveWordBs.newInstance().init().findAll(text); Assert.assertEquals("[邮箱, 123456789, xx.com]", wordList.toString()); } diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEnglishTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEnglishTest.java index 78ccb9a..b9c59e4 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEnglishTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEnglishTest.java @@ -22,7 +22,7 @@ public class SensitiveWordBsEnglishTest { public void ignoreEnglishStyleTest() { final String text = "Ⓕⓤc⒦ the bad words"; - List wordList = SensitiveWordBs.newInstance().findAll(text); + List wordList = SensitiveWordBs.newInstance().init().findAll(text); Assert.assertEquals("[Ⓕⓤc⒦]", wordList.toString()); } diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumLenTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumLenTest.java index 8383834..738591b 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumLenTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumLenTest.java @@ -23,12 +23,13 @@ public class SensitiveWordBsNumLenTest { final String text = "你懂得:12345678"; // 默认检测 8 位 - List wordList = SensitiveWordBs.newInstance().findAll(text); + List wordList = SensitiveWordBs.newInstance().init().findAll(text); Assert.assertEquals("[12345678]", wordList.toString()); // 指定数字的长度,避免误杀 List wordList2 = SensitiveWordBs.newInstance() .numCheckLen(9) + .init() .findAll(text); Assert.assertEquals("[]", wordList2.toString()); } diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumTest.java index c269b71..e7a9e0e 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsNumTest.java @@ -22,7 +22,7 @@ public class SensitiveWordBsNumTest { public void findAllTest() { final String text = "这个是我的微信:9989123456"; - List wordList = SensitiveWordBs.newInstance().findAll(text); + List wordList = SensitiveWordBs.newInstance().init().findAll(text); Assert.assertEquals("[微信, 9989123456]", wordList.toString()); } @@ -34,7 +34,7 @@ public class SensitiveWordBsNumTest { public void ignoreNumStyleTest() { final String text = "这个是我的微信:9⓿二肆⁹₈③⑸⒋➃㈤㊄"; - List wordList = SensitiveWordBs.newInstance().findAll(text); + List wordList = SensitiveWordBs.newInstance().init().findAll(text); Assert.assertEquals("[微信, 9⓿二肆⁹₈③⑸⒋➃㈤㊄]", wordList.toString()); } diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsRepeatTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsRepeatTest.java index 5621f4f..82ac54f 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsRepeatTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsRepeatTest.java @@ -24,6 +24,7 @@ public class SensitiveWordBsRepeatTest { List wordList = SensitiveWordBs.newInstance() .ignoreRepeat(true) + .init() .findAll(text); Assert.assertEquals("[ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦]", wordList.toString()); } diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java index 7067d5c..979e25d 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java @@ -2,6 +2,7 @@ package com.github.houbb.sensitive.word.bs; import com.github.houbb.sensitive.word.support.allow.WordAllows; import com.github.houbb.sensitive.word.support.deny.WordDenys; +import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaces; import org.junit.Assert; import org.junit.Test; @@ -24,7 +25,7 @@ public class SensitiveWordBsTest { public void containsTest() { final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; - Assert.assertTrue(SensitiveWordBs.newInstance().contains(text)); + Assert.assertTrue(SensitiveWordBs.newInstance().init().contains(text)); } /** @@ -35,7 +36,7 @@ public class SensitiveWordBsTest { public void findAllTest() { final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; - List wordList = SensitiveWordBs.newInstance().findAll(text); + List wordList = SensitiveWordBs.newInstance().init().findAll(text); Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString()); } @@ -47,7 +48,7 @@ public class SensitiveWordBsTest { public void findFirstTest() { final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; - String word = SensitiveWordBs.newInstance().findFirst(text); + String word = SensitiveWordBs.newInstance().init().findFirst(text); Assert.assertEquals("五星红旗", word); } @@ -59,7 +60,7 @@ public class SensitiveWordBsTest { public void replaceTest() { final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; - String result = SensitiveWordBs.newInstance().replace(text); + String result = SensitiveWordBs.newInstance().init().replace(text); Assert.assertEquals("****迎风飘扬,***的画像屹立在***前。", result); } @@ -71,7 +72,10 @@ public class SensitiveWordBsTest { public void replaceCharTest() { final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; - String result = SensitiveWordBs.newInstance().replace(text, '0'); + String result = SensitiveWordBs.newInstance() + .sensitiveWordReplace(SensitiveWordReplaces.chars('0')) + .init() + .replace(text); Assert.assertEquals("0000迎风飘扬,000的画像屹立在000前。", result); } @@ -83,7 +87,7 @@ public class SensitiveWordBsTest { public void ignoreCaseTest() { final String text = "fuCK the bad words."; - String word = SensitiveWordBs.newInstance().findFirst(text); + String word = SensitiveWordBs.newInstance().init().findFirst(text); Assert.assertEquals("fuCK", word); } @@ -95,7 +99,7 @@ public class SensitiveWordBsTest { public void ignoreWidthTest() { final String text = "fuck the bad words."; - String word = SensitiveWordBs.newInstance().findFirst(text); + String word = SensitiveWordBs.newInstance().init().findFirst(text); Assert.assertEquals("fuck", word); } diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlTest.java index e13a1c7..8f67774 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlTest.java @@ -22,11 +22,13 @@ public class SensitiveWordBsUrlTest { public void commonUrlTest() { final String text = "点击链接 www.baidu.com查看答案"; - List wordList = SensitiveWordBs.newInstance().findAll(text); + List wordList = SensitiveWordBs.newInstance().init().findAll(text); Assert.assertEquals("[链接, www.baidu.com]", wordList.toString()); Assert.assertEquals("点击** *************查看答案", SensitiveWordBs - .newInstance().replace(text)); + .newInstance() + .init() + .replace(text)); } /** @@ -41,10 +43,10 @@ public class SensitiveWordBsUrlTest { public void imageUrlTest() { final String text = "双击查看大图 www.big-image.png查看"; - List wordList = SensitiveWordBs.newInstance().findAll(text); + List wordList = SensitiveWordBs.newInstance().init().findAll(text); Assert.assertEquals("[www.big-image.png]", wordList.toString()); - Assert.assertEquals(text, SensitiveWordBs.newInstance().replace(text)); + Assert.assertEquals(text, SensitiveWordBs.newInstance().init().replace(text)); } } diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUserDefineTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUserDefineTest.java index 4978c34..b5c94ee 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUserDefineTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUserDefineTest.java @@ -22,7 +22,7 @@ public class SensitiveWordBsUserDefineTest { public void allowAndDenyTest() { final String text = "gender 我们认为应该通过,自定义敏感词我们认为应该拒绝。"; - List wordList = SensitiveWordBs.newInstance().findAll(text); + List wordList = SensitiveWordBs.newInstance().init().findAll(text); Assert.assertEquals("[自定义敏感词]", wordList.toString()); } diff --git a/src/test/java/com/github/houbb/sensitive/word/data/DictNumTest.java b/src/test/java/com/github/houbb/sensitive/word/data/DictNumTest.java index a4dbb7a..07e56e8 100644 --- a/src/test/java/com/github/houbb/sensitive/word/data/DictNumTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/data/DictNumTest.java @@ -1,12 +1,6 @@ package com.github.houbb.sensitive.word.data; -import com.github.houbb.heaven.support.handler.IHandler; import com.github.houbb.heaven.util.io.FileUtil; -import com.github.houbb.heaven.util.lang.StringUtil; -import com.github.houbb.heaven.util.util.CollectionUtil; -import com.github.houbb.opencc4j.core.impl.ZhConvertBootstrap; -import com.github.houbb.opencc4j.support.segment.impl.CharSegment; -import com.github.houbb.sensitive.word.utils.NumUtils; import org.junit.Ignore; import org.junit.Test; diff --git a/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java b/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java index c67e901..a6844ef 100644 --- a/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java @@ -3,12 +3,11 @@ package com.github.houbb.sensitive.word.data; import com.github.houbb.heaven.support.filter.IFilter; import com.github.houbb.heaven.support.handler.IHandler; import com.github.houbb.heaven.util.io.FileUtil; -import com.github.houbb.heaven.util.lang.NumUtil; import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.opencc4j.core.impl.ZhConvertBootstrap; import com.github.houbb.opencc4j.support.segment.impl.CharSegment; -import com.github.houbb.sensitive.word.utils.NumUtils; +import com.github.houbb.sensitive.word.utils.InnerNumUtils; import org.junit.Ignore; import org.junit.Test; @@ -128,7 +127,7 @@ public class DictSlimTest { // 停顿词语 String trim = string.replaceAll("加|否|与|和", ""); - String mapString = NumUtils.getMappingString(trim); + String mapString = InnerNumUtils.getMappingString(trim); boolean result = StringUtil.isDigit(mapString); if(result) { System.out.println(string);