From f45cb0be5a7d6992931848c428e7e67d285b72bd Mon Sep 17 00:00:00 2001 From: "binbin.hou" <1060732496@qq.com> Date: Thu, 16 Jan 2020 20:54:14 +0800 Subject: [PATCH] release branch 0.0.12 --- doc/CHANGE_LOG.md | 6 ++ ...£€æµ‹å®žçް.md => v011-邮箱网å€Regex检测实现.md} | 15 +++- pom.xml | 17 ++-- release.bat | 4 +- .../sensitive/word/api/IWordContext.java | 27 ++++-- .../houbb/sensitive/word/api/IWordMap.java | 1 + .../sensitive/word/bs/SensitiveWordBs.java | 64 +++++++++++---- .../word/bs/SensitiveWordContext.java | 36 +++++--- .../check}/ISensitiveCheck.java | 11 +-- .../support/check/SensitiveCheckChain.java | 53 ------------ .../support/check/SensitiveCheckResult.java | 64 +++++++++++++++ .../check/impl/SensitiveCheckChain.java | 58 +++++++++++++ .../SensitiveCheckEmail.java} | 11 +-- .../SensitiveCheckNum.java} | 17 ++-- .../support/check/impl/SensitiveCheckUrl.java | 82 +++++++++++++++++++ .../SensitiveCheckWord.java} | 11 +-- .../word/support/map/SensitiveWordMap.java | 33 +++++--- .../word/bs/SensitiveWordBsChineseTest.java | 2 +- .../word/bs/SensitiveWordBsEmailTest.java | 2 +- .../word/bs/SensitiveWordBsUrlTest.java | 50 +++++++++++ 20 files changed, 429 insertions(+), 135 deletions(-) rename doc/issues/roadmap/{v011-邮箱检测实现.md => v011-邮箱网å€Regex检测实现.md} (70%) rename src/main/java/com/github/houbb/sensitive/word/{api => support/check}/ISensitiveCheck.java (71%) delete mode 100644 src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveCheckChain.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveCheckResult.java create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckChain.java rename src/main/java/com/github/houbb/sensitive/word/support/check/{SensitiveEmailCheck.java => impl/SensitiveCheckEmail.java} (83%) rename src/main/java/com/github/houbb/sensitive/word/support/check/{SensitiveNumCheck.java => impl/SensitiveCheckNum.java} (76%) create mode 100644 src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java rename src/main/java/com/github/houbb/sensitive/word/support/check/{SensitiveWordCheck.java => impl/SensitiveCheckWord.java} (87%) create mode 100644 src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlTest.java diff --git a/doc/CHANGE_LOG.md b/doc/CHANGE_LOG.md index b1c0892..a4c1aca 100644 --- a/doc/CHANGE_LOG.md +++ b/doc/CHANGE_LOG.md @@ -87,3 +87,9 @@ |:---|:---|:---|:---|:--| | 1 | A | 添加对于数字过滤的å¯é…置型 | 2020-1-14 22:48:12 | | | 2 | A | æ·»åŠ éƒ¨åˆ†æ•æ„Ÿè¯ | 2020-1-14 22:48:12 | | + +# release_0.0.12 + +| åºå· | å˜æ›´ç±»åž‹ | 说明 | æ—¶é—´ | 备注 | +|:---|:---|:---|:---|:--| +| 1 | A | 添加对于网å€çš„过滤 | 2020-1-16 20:51:58 | | diff --git a/doc/issues/roadmap/v011-邮箱检测实现.md b/doc/issues/roadmap/v011-邮箱网å€Regex检测实现.md similarity index 70% rename from doc/issues/roadmap/v011-邮箱检测实现.md rename to doc/issues/roadmap/v011-邮箱网å€Regex检测实现.md index f4849ce..39f4736 100644 --- a/doc/issues/roadmap/v011-邮箱检测实现.md +++ b/doc/issues/roadmap/v011-邮箱网å€Regex检测实现.md @@ -1,6 +1,8 @@ # 是å¦ä¸ºé‚®ç®± check +暂时先使用基本的正则表达å¼ï¼Œ + ================== 网å€ç­‰ç­‰ @@ -13,6 +15,10 @@ Image-URL 检测,é¿å…替æ¢é”™è¯¯ã€‚ 针对ä¸åŒçš„ä¿¡æ¯è„±æ•,则需è¦çŸ¥é“å¯¹åº”çš„æ£€æµ‹ä»£ç æ˜¯ä»€ä¹ˆã€‚ +jpg +png +jpeg +gif ## 是å¦è„±æ•çš„é…ç½® @@ -26,4 +32,11 @@ Image-URL 检测,é¿å…替æ¢é”™è¯¯ã€‚ å¯ä»¥ç›´æŽ¥å¼€è¾Ÿå¦ä¸€é“éªŒè¯æ–¹å¼ã€‚ -直接 regex+全文检索实现。 \ No newline at end of file +直接 regex+全文检索实现。 + +# 剿 + +首先实现 Regex + +这里也å¯ä»¥æ”¯æŒ allow_regex/deny_regex + diff --git a/pom.xml b/pom.xml index 358b419..e9e767d 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.0.12-SNAPSHOT + 0.0.12 @@ -25,7 +25,7 @@ 1.7 - 0.1.72 + 0.1.73 1.2.0 @@ -37,13 +37,6 @@ - - com.github.houbb - heaven - ${heaven.version} - true - - com.github.houbb opencc4j @@ -59,6 +52,12 @@ + + + com.github.houbb + heaven + ${heaven.version} + diff --git a/release.bat b/release.bat index 164e8c4..b0cb2a0 100644 --- a/release.bat +++ b/release.bat @@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..." :: 版本å·ä¿¡æ¯(éœ€è¦æ‰‹åŠ¨æŒ‡å®š) :::: 旧版本åç§° -SET version=0.0.11 +SET version=0.0.12 :::: 新版本åç§° -SET newVersion=0.0.12 +SET newVersion=0.0.13 :::: 组织åç§° SET groupName=com.github.houbb :::: 项目åç§° diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java index ec1f268..12b219d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java @@ -88,30 +88,45 @@ public interface IWordContext { * @return 数字检测 * @since 0.0.5 */ - boolean sensitiveNumCheck(); + boolean sensitiveCheckNum(); /** * è®¾ç½®æ•æ„Ÿæ•°å­—检测 - * @param sensitiveNumCheck æ•°å­—æ ¼å¼æ£€æµ‹ + * @param sensitiveCheckNum æ•°å­—æ ¼å¼æ£€æµ‹ * @return this * @since 0.0.5 */ - IWordContext sensitiveNumCheck(final boolean sensitiveNumCheck); + IWordContext sensitiveCheckNum(final boolean sensitiveCheckNum); /** * 是å¦è¿›è¡Œé‚®ç®±æ£€æµ‹ * @return this * @since 0.0.9 */ - boolean sensitiveEmailCheck(); + boolean sensitiveCheckEmail(); /** * è®¾ç½®æ•æ„Ÿé‚®ç®±æ£€æµ‹ - * @param sensitiveEmailCheck æ˜¯å¦æ£€æµ‹ + * @param sensitiveCheckEmail æ˜¯å¦æ£€æµ‹ * @return this * @since 0.0.9 */ - IWordContext sensitiveEmailCheck(final boolean sensitiveEmailCheck); + IWordContext sensitiveCheckEmail(final boolean sensitiveCheckEmail); + + /** + * æ•æ„Ÿé“¾æŽ¥æ£€æµ‹ + * @return 是å¦å¯ç”¨ + * @since 0. + */ + boolean sensitiveCheckUrl(); + + /** + * è®¾ç½®æ•æ„Ÿé‚®ç®±æ£€æµ‹ + * @param sensitiveCheckUrl æ˜¯å¦æ£€æµ‹ + * @return this + * @since 0.0.9 + */ + IWordContext sensitiveCheckUrl(final boolean sensitiveCheckUrl); /** * 忽略英文的写法 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java index fa6a58f..2b30244 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java @@ -1,6 +1,7 @@ package com.github.houbb.sensitive.word.api; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; import java.util.Collection; import java.util.List; diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index 777d39a..f6f028d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -11,6 +11,7 @@ import java.util.List; /** * æ•æ„Ÿè¯å¼•导类 + * * @author binbin.hou * @since 0.0.1 */ @@ -18,37 +19,36 @@ public class SensitiveWordBs { /** * ç§æœ‰åŒ–构造器 + * * @since 0.0.1 */ - private SensitiveWordBs(){} + private SensitiveWordBs() { + } /** * æ•æ„Ÿè¯ map + * * @since 0.0.1 */ private static volatile IWordMap sensitiveWordMap; /** * 默认的执行上下文 + * * @since 0.0.4 */ private volatile IWordContext context; - /** - * 是å¦å¯ç”¨æ•°å­—校验 - * @since 0.0.11 - */ - private boolean enableNumCheck = true; - /** * DCL åˆå§‹åŒ– wordMap ä¿¡æ¯ + * * @return åˆå§‹åŒ–åŽçš„结果 * @since 0.0.4 */ private static IWordMap initWordMap() { - if(sensitiveWordMap == null) { + if (sensitiveWordMap == null) { synchronized (IWordMap.class) { - if(sensitiveWordMap == null) { + if (sensitiveWordMap == null) { // 加载é…ç½®ä¿¡æ¯ IWordData wordData = new SensitiveWordData(); List lines = wordData.getWordData(); @@ -65,8 +65,9 @@ public class SensitiveWordBs { /** * 新建验è¯å®žä¾‹ - * + *

* double-lock + * * @return this * @since 0.0.1 */ @@ -81,16 +82,40 @@ public class SensitiveWordBs { /** * 设置是å¦å¯åŠ¨æ•°å­—æ£€æµ‹ + * * @param enableNumCheck 数字检测 * @since 0.0.11 */ public SensitiveWordBs enableNumCheck(boolean enableNumCheck) { - this.context.sensitiveNumCheck(enableNumCheck); + this.context.sensitiveCheckNum(enableNumCheck); return this; -} + } + + /** + * 设置是å¦å¯åЍ email 检测 + * + * @param enableEmailCheck email 检测 + * @since 0.0.11 + */ + public SensitiveWordBs enableEmailCheck(boolean enableEmailCheck) { + this.context.sensitiveCheckEmail(enableEmailCheck); + return this; + } + + /** + * 设置是å¦å¯åЍ url 检测 + * + * @param enableUrlCheck url 检测 + * @since 0.0.12 + */ + public SensitiveWordBs enableUrlCheck(boolean enableUrlCheck) { + this.context.sensitiveCheckUrl(enableUrlCheck); + return this; + } /** * 构建默认的上下文 + * * @return 结果 * @since 0.0.4 */ @@ -105,13 +130,16 @@ public class SensitiveWordBs { wordContext.ignoreRepeat(true); // 开坿 ¡éªŒ - wordContext.sensitiveNumCheck(true); - wordContext.sensitiveEmailCheck(true); + wordContext.sensitiveCheckNum(true); + wordContext.sensitiveCheckEmail(true); + wordContext.sensitiveCheckUrl(true); return wordContext; } + /** * 是å¦åŒ…嫿•æ„Ÿè¯ + * * @param target 目标字符串 * @return æ˜¯å¦ * @since 0.0.1 @@ -124,6 +152,7 @@ public class SensitiveWordBs { * è¿”å›žæ‰€æœ‰çš„æ•æ„Ÿè¯ * 1. 这里是默认去é‡çš„,且是有åºçš„。 * 2. 如果ä¸å­˜åœ¨ï¼Œè¿”回空列表 + * * @param target 目标字符串 * @return æ•æ„Ÿè¯åˆ—表 * @since 0.0.1 @@ -135,6 +164,7 @@ public class SensitiveWordBs { /** * è¿”å›žç¬¬ä¸€ä¸ªæ•æ„Ÿè¯ * (1)如果ä¸å­˜åœ¨ï¼Œåˆ™è¿”回 {@code null} + * * @param target 目标字符串 * @return æ•æ„Ÿè¯ * @since 0.0.1 @@ -145,7 +175,8 @@ public class SensitiveWordBs { /** * æ›¿æ¢æ‰€æœ‰å†…容 - * @param target 目标字符串 + * + * @param target 目标字符串 * @param replaceChar 替æ¢ä¸ºçš„ char * @return 替æ¢åŽç»“æžœ * @since 0.0.2 @@ -157,12 +188,13 @@ public class SensitiveWordBs { /** * æ›¿æ¢æ‰€æœ‰å†…容 * 1. 默认使用空格替æ¢ï¼Œé¿å…æ˜Ÿå·æ”¹å˜ md 的格å¼ã€‚ + * * @param target 目标字符串 * @return 替æ¢åŽç»“æžœ * @since 0.0.2 */ public String replace(final String target) { - return this.replace(target, CharConst.BLANK); + return this.replace(target, CharConst.STAR); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java index b611f62..2a1ad9d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java @@ -39,7 +39,7 @@ public class SensitiveWordContext implements IWordContext { * 是å¦è¿›è¡Œæ•感数字检测 * @since 0.0.6 */ - private boolean sensitiveNumCheck; + private boolean sensitiveCheckNum; /** * 是å¦å¿½ç•¥ä¸­æ–‡ç¹ç®€ä½“ @@ -63,7 +63,13 @@ public class SensitiveWordContext implements IWordContext { * 是å¦è¿›è¡Œé‚®ç®±æµ‹è¯• * @since 0.0.9 */ - private boolean sensitiveEmailCheck; + private boolean sensitiveCheckEmail; + + /** + * 是å¦è¿›è¡Œ url 测试 + * @since 0.0.12 + */ + private boolean sensitiveCheckUrl; /** * ç§æœ‰åŒ–构造器 @@ -126,13 +132,13 @@ public class SensitiveWordContext implements IWordContext { } @Override - public boolean sensitiveNumCheck() { - return sensitiveNumCheck; + public boolean sensitiveCheckNum() { + return sensitiveCheckNum; } @Override - public SensitiveWordContext sensitiveNumCheck(boolean sensitiveNumCheck) { - this.sensitiveNumCheck = sensitiveNumCheck; + public SensitiveWordContext sensitiveCheckNum(boolean sensitiveCheckNum) { + this.sensitiveCheckNum = sensitiveCheckNum; return this; } @@ -170,14 +176,24 @@ public class SensitiveWordContext implements IWordContext { } @Override - public boolean sensitiveEmailCheck() { - return sensitiveEmailCheck; + public boolean sensitiveCheckEmail() { + return sensitiveCheckEmail; } @Override - public SensitiveWordContext sensitiveEmailCheck(boolean sensitiveEmailCheck) { - this.sensitiveEmailCheck = sensitiveEmailCheck; + public SensitiveWordContext sensitiveCheckEmail(boolean sensitiveCheckEmail) { + this.sensitiveCheckEmail = sensitiveCheckEmail; return this; } + @Override + public boolean sensitiveCheckUrl() { + return sensitiveCheckUrl; + } + + @Override + public SensitiveWordContext sensitiveCheckUrl(boolean sensitiveCheckUrl) { + this.sensitiveCheckUrl = sensitiveCheckUrl; + return this; + } } diff --git a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/ISensitiveCheck.java similarity index 71% rename from src/main/java/com/github/houbb/sensitive/word/api/ISensitiveCheck.java rename to src/main/java/com/github/houbb/sensitive/word/support/check/ISensitiveCheck.java index 08a3eee..a07578c 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/ISensitiveCheck.java @@ -1,5 +1,6 @@ -package com.github.houbb.sensitive.word.api; +package com.github.houbb.sensitive.word.support.check; +import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; /** @@ -33,9 +34,9 @@ public interface ISensitiveCheck { * @return æ•æ„Ÿä¿¡æ¯å¯¹åº”的长度 * @since 0.0.5 */ - int checkSensitive(final String txt, - final int beginIndex, - final ValidModeEnum validModeEnum, - final IWordContext context); + SensitiveCheckResult sensitiveCheck(final String txt, + final int beginIndex, + final ValidModeEnum validModeEnum, + final IWordContext context); } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveCheckChain.java b/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveCheckChain.java deleted file mode 100644 index e0c45e6..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveCheckChain.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.github.houbb.sensitive.word.support.check; - -import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.heaven.support.instance.impl.Instances; -import com.github.houbb.heaven.util.guava.Guavas; -import com.github.houbb.sensitive.word.api.ISensitiveCheck; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; - -import java.util.List; - -/** - * æ•æ„Ÿè¯æ£€æµ‹è´£ä»»é“¾æ¨¡å¼ - * - * 这里å¯ä»¥æä¾›ä¸€ä¸ªå…¬å…±çš„父类。 - * - * - * DFA 算法的优化å¯ä»¥å‚考论文: - * ã€DFA 算法】å„ç§è®ºæ–‡ã€‚ - * - * @author binbin.hou - * @since 0.0.5 - */ -@ThreadSafe -public class SensitiveCheckChain implements ISensitiveCheck { - - @Override - public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { - // åˆå§‹åŒ–责任链 - List sensitiveChecks = Guavas.newArrayList(); - // é»˜è®¤æ·»åŠ æ•æ„Ÿè¯æ ¡éªŒ - sensitiveChecks.add(Instances.singleton(SensitiveWordCheck.class)); - if(context.sensitiveNumCheck()) { - sensitiveChecks.add(Instances.singleton(SensitiveNumCheck.class)); - } - if(context.sensitiveEmailCheck()) { - sensitiveChecks.add(Instances.singleton(SensitiveEmailCheck.class)); - } - - // 循环调用 - for(ISensitiveCheck sensitiveCheck : sensitiveChecks) { - int result = sensitiveCheck.checkSensitive(txt, beginIndex, validModeEnum, context); - - if(result > 0) { - return result; - } - } - - // 默认返回 0 - return 0; - } - -} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveCheckResult.java b/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveCheckResult.java new file mode 100644 index 0000000..88d24fb --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveCheckResult.java @@ -0,0 +1,64 @@ +package com.github.houbb.sensitive.word.support.check; + +/** + * æ•æ„Ÿä¿¡æ¯ç›‘测接å£ç»“æžœ + * + * å¯ä»¥ä½¿ç”¨è´£ä»»é“¾çš„æ¨¡å¼ï¼Œå¾ªçŽ¯è°ƒç”¨ã€‚ + * @author binbin.hou + * @since 0.0.12 + */ +public class SensitiveCheckResult { + + /** + * 下标 + * @since 0.0.12 + */ + private int index; + + /** + * 检测类 + * @since 0.0.12 + */ + private Class checkClass; + + /** + * 实例化 + * @param index 返回索引 + * @param checkClass 验è¯ç±» + * @return 结果 + * @since 0.0.12 + */ + public static SensitiveCheckResult of(final int index, + final Class checkClass) { + SensitiveCheckResult result = new SensitiveCheckResult(); + result.index(index).checkClass(checkClass); + return result; + } + + public int index() { + return index; + } + + public SensitiveCheckResult index(int index) { + this.index = index; + return this; + } + + public Class checkClass() { + return checkClass; + } + + public SensitiveCheckResult checkClass(Class checkClass) { + this.checkClass = checkClass; + return this; + } + + @Override + public String toString() { + return "SensitiveCheckResult{" + + "index=" + index + + ", checkClass=" + checkClass + + '}'; + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckChain.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckChain.java new file mode 100644 index 0000000..2d099fa --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckChain.java @@ -0,0 +1,58 @@ +package com.github.houbb.sensitive.word.support.check.impl; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.heaven.support.instance.impl.Instances; +import com.github.houbb.heaven.util.guava.Guavas; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; +import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; + +import java.util.List; + +/** + * æ•æ„Ÿè¯æ£€æµ‹è´£ä»»é“¾æ¨¡å¼ + * + * 这里å¯ä»¥æä¾›ä¸€ä¸ªå…¬å…±çš„父类。 + * + * + * DFA 算法的优化å¯ä»¥å‚考论文: + * ã€DFA 算法】å„ç§è®ºæ–‡ã€‚ + * + * @author binbin.hou + * @since 0.0.5 + */ +@ThreadSafe +public class SensitiveCheckChain implements ISensitiveCheck { + + @Override + public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { + // åˆå§‹åŒ–责任链 + List sensitiveChecks = Guavas.newArrayList(); + // é»˜è®¤æ·»åŠ æ•æ„Ÿè¯æ ¡éªŒ + sensitiveChecks.add(Instances.singleton(SensitiveCheckWord.class)); + if(context.sensitiveCheckNum()) { + sensitiveChecks.add(Instances.singleton(SensitiveCheckNum.class)); + } + if(context.sensitiveCheckEmail()) { + sensitiveChecks.add(Instances.singleton(SensitiveCheckEmail.class)); + } + if(context.sensitiveCheckUrl()) { + sensitiveChecks.add(Instances.singleton(SensitiveCheckUrl.class)); + } + + // 循环调用 + for(ISensitiveCheck sensitiveCheck : sensitiveChecks) { + SensitiveCheckResult result = sensitiveCheck.sensitiveCheck(txt, beginIndex, validModeEnum, context); + + if(result.index() > 0) { + return result; + } + } + + // 这里直接进行正则表达å¼ç›¸å…³çš„调用。 + // 默认返回 0 + return SensitiveCheckResult.of(0, SensitiveCheckChain.class); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveEmailCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java similarity index 83% rename from src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveEmailCheck.java rename to src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java index 0325212..3718b5c 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveEmailCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckEmail.java @@ -1,12 +1,13 @@ -package com.github.houbb.sensitive.word.support.check; +package com.github.houbb.sensitive.word.support.check.impl; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.support.instance.impl.Instances; import com.github.houbb.heaven.util.lang.CharUtil; import com.github.houbb.heaven.util.util.regex.RegexUtil; -import com.github.houbb.sensitive.word.api.ISensitiveCheck; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; +import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; import com.github.houbb.sensitive.word.support.format.CharFormatChain; /** @@ -25,10 +26,10 @@ import com.github.houbb.sensitive.word.support.format.CharFormatChain; * @since 0.0.9 */ @ThreadSafe -public class SensitiveEmailCheck implements ISensitiveCheck { +public class SensitiveCheckEmail implements ISensitiveCheck { @Override - public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { + public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { // è®°å½•æ•æ„Ÿè¯çš„长度 int lengthCount = 0; int actualLength = 0; @@ -59,7 +60,7 @@ public class SensitiveEmailCheck implements ISensitiveCheck { } } - return actualLength; + return SensitiveCheckResult.of(actualLength, SensitiveCheckEmail.class); } /** diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveNumCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java similarity index 76% rename from src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveNumCheck.java rename to src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java index 166cefb..74bd783 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveNumCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckNum.java @@ -1,17 +1,12 @@ -package com.github.houbb.sensitive.word.support.check; +package com.github.houbb.sensitive.word.support.check.impl; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.support.instance.impl.Instances; -import com.github.houbb.heaven.util.io.FileUtil; -import com.github.houbb.heaven.util.lang.NumUtil; -import com.github.houbb.heaven.util.lang.StringUtil; -import com.github.houbb.sensitive.word.api.ISensitiveCheck; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; +import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; import com.github.houbb.sensitive.word.support.format.CharFormatChain; -import com.github.houbb.sensitive.word.support.format.IgnoreNumStyleCharFormat; - -import java.util.List; /** * æ•æ„Ÿè¯ç›‘测实现 @@ -21,10 +16,10 @@ import java.util.List; * @since 0.0.5 */ @ThreadSafe -public class SensitiveNumCheck implements ISensitiveCheck { +public class SensitiveCheckNum implements ISensitiveCheck { @Override - public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { + public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { // è®°å½•æ•æ„Ÿè¯çš„长度 int lengthCount = 0; int actualLength = 0; @@ -55,7 +50,7 @@ public class SensitiveNumCheck implements ISensitiveCheck { } } - return actualLength; + return SensitiveCheckResult.of(actualLength, SensitiveCheckNum.class); } /** diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java new file mode 100644 index 0000000..d5760fb --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckUrl.java @@ -0,0 +1,82 @@ +package com.github.houbb.sensitive.word.support.check.impl; + +import com.github.houbb.heaven.annotation.CommonEager; +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.heaven.support.instance.impl.Instances; +import com.github.houbb.heaven.util.lang.CharUtil; +import com.github.houbb.heaven.util.util.regex.RegexUtil; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; +import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; +import com.github.houbb.sensitive.word.support.format.CharFormatChain; + +/** + * URL æ­£åˆ™è¡¨è¾¾å¼æ£€æµ‹å®žçŽ°ã€‚ + * + * 也å¯ä»¥ä¸¥æ ¼çš„ä¿ç•™ä¸‹æ¥ã€‚ + * + * (1ï¼‰æš‚æ—¶å…ˆç²—ç•¥çš„å¤„ç† web-site + * (2)如果网å€çš„æœ€åŽä¸ºå›¾ç‰‡ç±»åž‹ï¼Œåˆ™è·³è¿‡ã€‚ + * (3)长度超过 70,直接结æŸã€‚ + * + * @author binbin.hou + * @since 0.0.9 + */ +@ThreadSafe +public class SensitiveCheckUrl implements ISensitiveCheck { + + /** + * 最长的网å€é•¿åº¦ + * @since 0.0.12 + */ + private static final int MAX_WEB_SITE_LEN = 70; + + @Override + public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { + // è®°å½•æ•æ„Ÿè¯çš„长度 + int lengthCount = 0; + int actualLength = 0; + + StringBuilder stringBuilder = new StringBuilder(); + // è¿™é‡Œå·æ‡’直接使用 String 拼接,然åŽç»“åˆæ­£åˆ™è¡¨è¾¾å¼ã€‚ + // DFA 本质就å¯ä»¥åšæ­£åˆ™è¡¨è¾¾å¼ï¼Œè¿™æ ·å®žçްä¸å…性能会差一些。 + // åŽæœŸå¦‚果有想法,对 DFA 进一步深入学习åŽï¼Œå°†è¿›è¡Œä¼˜åŒ–。 + for(int i = beginIndex; i < txt.length(); i++) { + char currentChar = txt.charAt(i); + char mappingChar = Instances.singleton(CharFormatChain.class) + .format(currentChar, context); + + if(CharUtil.isWebSiteChar(mappingChar) + && lengthCount <= MAX_WEB_SITE_LEN) { + lengthCount++; + stringBuilder.append(currentChar); + + if(isCondition(stringBuilder.toString())) { + actualLength = lengthCount; + + // 是å¦é历全部匹é…çš„æ¨¡å¼ + if(ValidModeEnum.FAIL_FAST.equals(validModeEnum)) { + break; + } + } + } else { + break; + } + } + + return SensitiveCheckResult.of(actualLength, SensitiveCheckUrl.class); + } + + /** + * 这里指定一个阈值æ¡ä»¶ + * @param string 长度 + * @return æ˜¯å¦æ»¡è¶³æ¡ä»¶ + * @since 0.0.12 + */ + private boolean isCondition(final String string) { + return RegexUtil.isWebSite(string); + } + + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveWordCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java similarity index 87% rename from src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveWordCheck.java rename to src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java index c8206a6..9b6a364 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/SensitiveWordCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java @@ -1,12 +1,13 @@ -package com.github.houbb.sensitive.word.support.check; +package com.github.houbb.sensitive.word.support.check.impl; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.support.instance.impl.Instances; import com.github.houbb.heaven.util.lang.ObjectUtil; -import com.github.houbb.sensitive.word.api.ISensitiveCheck; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.constant.AppConst; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; +import com.github.houbb.sensitive.word.support.check.ISensitiveCheck; +import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; import com.github.houbb.sensitive.word.support.format.CharFormatChain; import java.util.Map; @@ -17,10 +18,10 @@ import java.util.Map; * @since 0.0.5 */ @ThreadSafe -public class SensitiveWordCheck implements ISensitiveCheck { +public class SensitiveCheckWord implements ISensitiveCheck { @Override - public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { + public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { Map nowMap = context.sensitiveWordMap(); // è®°å½•æ•æ„Ÿè¯çš„长度 @@ -53,7 +54,7 @@ public class SensitiveWordCheck implements ISensitiveCheck { } } - return actualLength; + return SensitiveCheckResult.of(actualLength, SensitiveCheckWord.class); } /** diff --git a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java index c233821..35e5295 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java @@ -3,6 +3,7 @@ package com.github.houbb.sensitive.word.support.map; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.support.instance.impl.Instances; import com.github.houbb.heaven.util.guava.Guavas; +import com.github.houbb.heaven.util.io.FileUtil; import com.github.houbb.heaven.util.lang.CharUtil; import com.github.houbb.heaven.util.lang.ObjectUtil; import com.github.houbb.heaven.util.lang.StringUtil; @@ -12,7 +13,9 @@ import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordMap; import com.github.houbb.sensitive.word.constant.AppConst; import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum; -import com.github.houbb.sensitive.word.support.check.SensitiveCheckChain; +import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult; +import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckChain; +import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl; import java.util.Collection; import java.util.HashMap; @@ -118,9 +121,9 @@ public class SensitiveWordMap implements IWordMap { } for (int i = 0; i < string.length(); i++) { - int checkResult = checkSensitive(string, i, ValidModeEnum.FAIL_FAST, context); + SensitiveCheckResult checkResult = sensitiveCheck(string, i, ValidModeEnum.FAIL_FAST, context); // 快速返回 - if (checkResult > 0) { + if (checkResult.index() > 0) { return true; } } @@ -178,9 +181,9 @@ public class SensitiveWordMap implements IWordMap { List resultList = Guavas.newArrayList(); for (int i = 0; i < text.length(); i++) { - int wordLength = checkSensitive(text, i, ValidModeEnum.FAIL_OVER, context); - + SensitiveCheckResult checkResult = sensitiveCheck(text, i, ValidModeEnum.FAIL_OVER, context); // 命中 + int wordLength = checkResult.index(); if (wordLength > 0) { // ä¿å­˜æ•æ„Ÿè¯ String sensitiveWord = text.substring(i, i + wordLength); @@ -223,12 +226,22 @@ public class SensitiveWordMap implements IWordMap { for (int i = 0; i < target.length(); i++) { char currentChar = target.charAt(i); // 内层直接从 i 开始往åŽé历,这个算法的,获å–第一个匹é…çš„å•è¯ - int wordLength = checkSensitive(target, i, ValidModeEnum.FAIL_OVER, context); + SensitiveCheckResult checkResult = sensitiveCheck(target, i, ValidModeEnum.FAIL_OVER, context); // æ•æ„Ÿè¯ + int wordLength = checkResult.index(); if(wordLength > 0) { - String replaceStr = CharUtil.repeat(replaceChar, wordLength); - resultBuilder.append(replaceStr); + // æ˜¯å¦æ‰§è¡Œæ›¿æ¢ + Class checkClass = checkResult.checkClass(); + String string = target.substring(i, i+wordLength); + if(SensitiveCheckUrl.class.equals(checkClass) + && FileUtil.isImage(string)) { + // 直接使用原始内容,é¿å… markdown 图片转æ¢å¤±è´¥ + resultBuilder.append(string); + } else { + String replaceStr = CharUtil.repeat(replaceChar, wordLength); + resultBuilder.append(replaceStr); + } // ç›´æŽ¥è·³è¿‡æ•æ„Ÿè¯çš„长度 i += wordLength-1; @@ -242,13 +255,13 @@ public class SensitiveWordMap implements IWordMap { } @Override - public int checkSensitive(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { + public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) { // é»˜è®¤æ‰§è¡Œæ•æ„Ÿè¯æ“作 context.sensitiveWordMap(innerWordMap); // 责任链模å¼è°ƒç”¨ return Instances.singleton(SensitiveCheckChain.class) - .checkSensitive(txt, beginIndex, validModeEnum, context); + .sensitiveCheck(txt, beginIndex, validModeEnum, context); } } diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsChineseTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsChineseTest.java index 1fac846..5fa0485 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsChineseTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsChineseTest.java @@ -23,7 +23,7 @@ public class SensitiveWordBsChineseTest { final String text = "我爱我的祖国和五星紅旗。"; List wordList = SensitiveWordBs.newInstance().findAll(text); - Assert.assertEquals("[五星紅旗]", wordList.toString()); + Assert.assertEquals("[祖国, 五星紅旗]", wordList.toString()); } } diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEmailTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEmailTest.java index 1b9d4d8..c930774 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEmailTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEmailTest.java @@ -35,7 +35,7 @@ public class SensitiveWordBsEmailTest { final String text = "楼主好人,邮箱 123456789@xx.com"; List wordList = SensitiveWordBs.newInstance().findAll(text); - Assert.assertEquals("[邮箱, 123456789]", wordList.toString()); + Assert.assertEquals("[邮箱, 123456789, xx.com]", wordList.toString()); } } diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlTest.java new file mode 100644 index 0000000..e13a1c7 --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlTest.java @@ -0,0 +1,50 @@ +package com.github.houbb.sensitive.word.bs; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.List; + +/** + *

project: sensitive-word-SensitiveWordBsTest

+ *

create on 2020/1/7 23:43

+ * + * @author Administrator + * @since 0.0.12 + */ +public class SensitiveWordBsUrlTest { + + /** + * 忽略中文ç¹ç®€ä½“ + * @since 0.0.12 + */ + @Test + public void commonUrlTest() { + final String text = "点击链接 www.baidu.com查看答案"; + + List wordList = SensitiveWordBs.newInstance().findAll(text); + Assert.assertEquals("[链接, www.baidu.com]", wordList.toString()); + + Assert.assertEquals("点击** *************查看答案", SensitiveWordBs + .newInstance().replace(text)); + } + + /** + * 图片测试 + * + * (1)å¯ä»¥æ£€æµ‹ + * (2ï¼‰é»˜è®¤ä¸æ›¿æ¢ + * + * @since 0.0.12 + */ + @Test + public void imageUrlTest() { + final String text = "åŒå‡»æŸ¥çœ‹å¤§å›¾ www.big-image.png查看"; + + List wordList = SensitiveWordBs.newInstance().findAll(text); + Assert.assertEquals("[www.big-image.png]", wordList.toString()); + + Assert.assertEquals(text, SensitiveWordBs.newInstance().replace(text)); + } + +}