collection);
/**
* 新增敏感词
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
index a9afa35..4f8e50d 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
@@ -10,6 +10,7 @@ import com.github.houbb.sensitive.word.api.combine.IWordCheckCombine;
import com.github.houbb.sensitive.word.api.combine.IWordFormatCombine;
import com.github.houbb.sensitive.word.core.SensitiveWords;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
+import com.github.houbb.sensitive.word.support.check.WordChecks;
import com.github.houbb.sensitive.word.support.combine.allowdeny.WordAllowDenyCombines;
import com.github.houbb.sensitive.word.support.combine.check.WordCheckCombines;
import com.github.houbb.sensitive.word.support.combine.format.WordFormatCombines;
@@ -182,6 +183,36 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
*/
private IWordResultCondition wordResultCondition = WordResultConditions.alwaysTrue();
+ /**
+ * 单词检测策略
+ * @since 0.25.0
+ */
+ private IWordCheck wordCheckWord = WordChecks.word();
+
+ /**
+ * 数字检测策略
+ * @since 0.25.0
+ */
+ private IWordCheck wordCheckNum = WordChecks.num();
+
+ /**
+ * email 检测策略
+ * @since 0.25.0
+ */
+ private IWordCheck wordCheckEmail = WordChecks.email();
+
+ /**
+ * URL 检测策略
+ * @since 0.25.0
+ */
+ private IWordCheck wordCheckUrl = WordChecks.url();
+
+ /**
+ * ipv4 检测策略
+ * @since 0.25.0
+ */
+ private IWordCheck wordCheckIpv4 = WordChecks.ipv4();
+
/**
* 新建验证实例
*
@@ -255,6 +286,13 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
context.enableWordCheck(enableWordCheck);
context.enableIpv4Check(enableIpv4Check);
+ // 校验策略实现配置
+ context.wordCheckWord(wordCheckWord);
+ context.wordCheckEmail(wordCheckEmail);
+ context.wordCheckNum(wordCheckNum);
+ context.wordCheckUrl(wordCheckUrl);
+ context.wordCheckIpv4(wordCheckIpv4);
+
// 额外配置
context.sensitiveCheckNumLen(numCheckLen);
context.wordReplace(wordReplace);
@@ -370,6 +408,41 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
return this;
}
+ public SensitiveWordBs wordCheckWord(IWordCheck wordCheckWord) {
+ ArgUtil.notNull(wordCheckWord, "wordCheckWord");
+
+ this.wordCheckWord = wordCheckWord;
+ return this;
+ }
+
+ public SensitiveWordBs wordCheckNum(IWordCheck wordCheckNum) {
+ ArgUtil.notNull(wordCheckNum, "wordCheckNum");
+
+ this.wordCheckNum = wordCheckNum;
+ return this;
+ }
+
+ public SensitiveWordBs wordCheckEmail(IWordCheck wordCheckEmail) {
+ ArgUtil.notNull(wordCheckEmail, "wordCheckEmail");
+
+ this.wordCheckEmail = wordCheckEmail;
+ return this;
+ }
+
+ public SensitiveWordBs wordCheckUrl(IWordCheck wordCheckUrl) {
+ ArgUtil.notNull(wordCheckUrl, "wordCheckUrl");
+
+ this.wordCheckUrl = wordCheckUrl;
+ return this;
+ }
+
+ public SensitiveWordBs wordCheckIpv4(IWordCheck wordCheckIpv4) {
+ ArgUtil.notNull(wordCheckIpv4, "wordCheckIpv4");
+
+ this.wordCheckIpv4 = wordCheckIpv4;
+ return this;
+ }
+
//-------------------------------------------------------- 基础属性设置
/**
* 是否启用 ipv4 校验
@@ -642,9 +715,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
// 主要原因是二者没有保持一致,初始化的数据和插入的数据没有做相同的格式化
List formatList = InnerWordFormatUtils.formatWordList(collection, context);
- for(String word : formatList) {
- this.wordData.removeWord(word);
- }
+ this.wordData.removeWord(formatList);
}
/**
@@ -701,9 +772,8 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
// 主要原因是二者没有保持一致,初始化的数据和插入的数据没有做相同的格式化
List formatList = InnerWordFormatUtils.formatWordList(collection, context);
- for(String word : formatList) {
- this.wordDataAllow.removeWord(word);
- }
+ this.wordDataAllow.removeWord(formatList);
+
}
/**
* 新增敏感词白名单
diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
index 30663e3..bcd884c 100644
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
@@ -1,6 +1,7 @@
package com.github.houbb.sensitive.word.bs;
import com.github.houbb.sensitive.word.api.*;
+import com.github.houbb.sensitive.word.support.check.WordChecks;
/**
* 上下文
@@ -133,6 +134,36 @@ public class SensitiveWordContext implements IWordContext {
*/
private IWordResultCondition wordResultCondition;
+ /**
+ * 单词检测策略
+ * @since 0.25.0
+ */
+ private IWordCheck wordCheckWord;
+
+ /**
+ * 数字检测策略
+ * @since 0.25.0
+ */
+ private IWordCheck wordCheckNum;
+
+ /**
+ * email 检测策略
+ * @since 0.25.0
+ */
+ private IWordCheck wordCheckEmail;
+
+ /**
+ * URL 检测策略
+ * @since 0.25.0
+ */
+ private IWordCheck wordCheckUrl;
+
+ /**
+ * ipv4 检测策略
+ * @since 0.25.0
+ */
+ private IWordCheck wordCheckIpv4;
+
public IWordData wordData() {
return wordData;
}
@@ -355,4 +386,49 @@ public class SensitiveWordContext implements IWordContext {
this.wordResultCondition = wordResultCondition;
return this;
}
+
+ public IWordCheck wordCheckWord() {
+ return wordCheckWord;
+ }
+
+ public SensitiveWordContext wordCheckWord(IWordCheck wordCheckWord) {
+ this.wordCheckWord = wordCheckWord;
+ return this;
+ }
+
+ public IWordCheck wordCheckNum() {
+ return wordCheckNum;
+ }
+
+ public SensitiveWordContext wordCheckNum(IWordCheck wordCheckNum) {
+ this.wordCheckNum = wordCheckNum;
+ return this;
+ }
+
+ public IWordCheck wordCheckEmail() {
+ return wordCheckEmail;
+ }
+
+ public SensitiveWordContext wordCheckEmail(IWordCheck wordCheckEmail) {
+ this.wordCheckEmail = wordCheckEmail;
+ return this;
+ }
+
+ public IWordCheck wordCheckUrl() {
+ return wordCheckUrl;
+ }
+
+ public SensitiveWordContext wordCheckUrl(IWordCheck wordCheckUrl) {
+ this.wordCheckUrl = wordCheckUrl;
+ return this;
+ }
+
+ public IWordCheck wordCheckIpv4() {
+ return wordCheckIpv4;
+ }
+
+ public SensitiveWordContext wordCheckIpv4(IWordCheck wordCheckIpv4) {
+ this.wordCheckIpv4 = wordCheckIpv4;
+ return this;
+ }
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java
index 4ca748b..105b79c 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrl.java
@@ -60,7 +60,17 @@ public class WordCheckUrl extends AbstractConditionWordCheck {
// 改为 http:// 或者 https:// 开头
String string = stringBuilder.toString();
- return RegexUtil.isUrl(string);
+ return isUrl(string);
+ }
+
+ /**
+ * 是否为 URL
+ * @param text 原始文本
+ * @return 结果
+ * @since 0.25.0
+ */
+ protected boolean isUrl(final String text) {
+ return RegexUtil.isUrl(text);
}
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrlNoPrefix.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrlNoPrefix.java
new file mode 100644
index 0000000..4a6a491
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckUrlNoPrefix.java
@@ -0,0 +1,32 @@
+package com.github.houbb.sensitive.word.support.check;
+
+import com.github.houbb.heaven.util.util.regex.RegexUtil;
+import com.github.houbb.sensitive.word.api.IWordCheck;
+
+/**
+ * (1)暂时先粗略的处理 web-site
+ * (2)如果网址的最后为图片类型,则跳过。
+ * (3)长度超过 70,直接结束。
+ *
+ * 不包含前缀的实现策略
+ *
+ * @author binbin.hou
+ * @since 0.25.0
+ */
+public class WordCheckUrlNoPrefix extends WordCheckUrl {
+
+ /**
+ * @since 0.3.0
+ */
+ private static final IWordCheck INSTANCE = new WordCheckUrlNoPrefix();
+
+ public static IWordCheck getInstance() {
+ return INSTANCE;
+ }
+
+ @Override
+ protected boolean isUrl(String text) {
+ return RegexUtil.isWebSite(text);
+ }
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java
new file mode 100644
index 0000000..b7c72c0
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWordMaxLen.java
@@ -0,0 +1,96 @@
+package com.github.houbb.sensitive.word.support.check;
+
+import com.github.houbb.heaven.annotation.ThreadSafe;
+import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
+import com.github.houbb.sensitive.word.api.IWordCheck;
+import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.api.IWordData;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
+import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
+import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
+import com.github.houbb.sensitive.word.support.result.WordLengthResult;
+
+import java.util.Map;
+
+/**
+ * 敏感词监测实现
+ * @author binbin.hou
+ * @since 0.26.0
+ */
+@Deprecated
+public class WordCheckWordMaxLen extends AbstractWordCheck {
+
+ @Override
+ protected Class extends IWordCheck> getSensitiveCheckClass() {
+ return WordCheckWordMaxLen.class;
+ }
+
+ @Override
+ protected WordLengthResult getActualLength(int beginIndex, InnerSensitiveWordContext innerContext) {
+ final String txt = innerContext.originalText();
+ final Map formatCharMapping = innerContext.formatCharMapping();
+ final IWordContext context = innerContext.wordContext();
+ final IWordData wordData = context.wordData();
+ final IWordData wordDataAllow = context.wordDataAllow();
+ final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore();
+
+ // 前一个条件
+ StringBuilder stringBuilder = new StringBuilder();
+ char[] rawChars = txt.toCharArray();
+
+ int tempLen = 0;
+ int maxWhite = 0;
+ int maxBlack = 0;
+ boolean firstCheck = true;
+
+ WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
+ WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
+
+ for (int i = beginIndex; i < rawChars.length; i++) {
+ if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) {
+ tempLen++;
+ continue;
+ }
+
+ char mappingChar = formatCharMapping.get(rawChars[i]);
+ stringBuilder.append(mappingChar);
+ tempLen++;
+
+ if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow)) {
+ wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
+ if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) {
+ maxWhite += tempLen;
+ wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND;
+ }
+ }
+
+ // 黑名单命中
+ if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
+ wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
+ if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) {
+ maxBlack += tempLen;
+ wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND;
+ }
+ }
+
+ // 不再是第一次检测
+ firstCheck = false;
+
+ // 黑白名单都未匹配
+ if (WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow) &&
+ WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
+ break;
+ }
+ }
+
+ return WordLengthResult.newInstance()
+ .wordAllowLen(maxWhite)
+ .wordDenyLen(maxBlack);
+ }
+
+ @Override
+ protected String getType() {
+ return WordTypeEnum.WORD.getCode();
+ }
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java b/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java
index 230bab2..d173c2d 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordChecks.java
@@ -77,4 +77,15 @@ public final class WordChecks {
return WordCheckIPV4.getInstance();
}
+ /**
+ * 不需要前缀的 urlPrefix
+ * 注意:这种检测方法可能会和代码中的包名称冲突
+ *
+ * @return 实现
+ * @since 0.25.0
+ */
+ public static IWordCheck urlNoPrefix() {
+ return WordCheckUrlNoPrefix.getInstance();
+ }
+
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java b/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java
index 60205fb..61fbbb0 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/combine/check/WordCheckCombine.java
@@ -18,19 +18,19 @@ public class WordCheckCombine extends AbstractWordCheckCombine {
List wordCheckList = new ArrayList<>();
if(context.enableWordCheck()) {
- wordCheckList.add(WordChecks.word());
+ wordCheckList.add(context.wordCheckWord());
}
if(context.enableNumCheck()) {
- wordCheckList.add(WordChecks.num());
+ wordCheckList.add(context.wordCheckNum());
}
if(context.enableEmailCheck()) {
- wordCheckList.add(WordChecks.email());
+ wordCheckList.add(context.wordCheckEmail());
}
if(context.enableUrlCheck()) {
- wordCheckList.add(WordChecks.url());
+ wordCheckList.add(context.wordCheckUrl());
}
if(context.enableIpv4Check()) {
- wordCheckList.add(WordChecks.ipv4());
+ wordCheckList.add(context.wordCheckIpv4());
}
return wordCheckList;
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java b/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java
index 06515d5..ffb06dc 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java
@@ -1,6 +1,5 @@
package com.github.houbb.sensitive.word.support.data;
-import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.IWordData;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
@@ -31,9 +30,9 @@ public abstract class AbstractWordData implements IWordData {
/**
* 删除敏感词
- * @param word 敏感词
+ * @param collection 集合
*/
- protected abstract void doRemoveWord(String word);
+ protected abstract void doRemoveWord(Collection collection);
/**
* 新增敏感词
@@ -49,12 +48,12 @@ public abstract class AbstractWordData implements IWordData {
}
@Override
- public void removeWord(String word) {
- if(StringUtil.isEmpty(word)) {
+ public void removeWord(Collection collection) {
+ if(CollectionUtil.isEmpty(collection)) {
return;
}
- doRemoveWord(word);
+ doRemoveWord(collection);
}
@Override
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java
index 834d85f..bf189c6 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java
@@ -89,7 +89,7 @@ public class WordDataHashMap extends AbstractWordData {
}
@Override
- protected void doRemoveWord(String word) {
+ protected void doRemoveWord(Collection collection) {
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java
index 453c841..5027f24 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java
@@ -4,7 +4,6 @@ import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.util.lang.ObjectUtil;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.api.IWordData;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
@@ -84,49 +83,7 @@ public class WordDataTree extends AbstractWordData {
this.root = newRoot;
}
- @Override
- protected void doRemoveWord(String word) {
- WordDataTreeNode tempNode = root;
- //需要删除的
- Map map = new HashMap<>();
- char[] chars = word.toCharArray();
- int length = chars.length;
- for (int i = 0; i < length; i++) {
- //不存在第一个词
- WordDataTreeNode subNode = tempNode.getSubNode(chars[i]);
- if (subNode == null) {
- return;
- }
- if (i == (length - 1)) {
- //尾字符判断是否结束
- if (!subNode.end()) {
- return;
- }
- if (subNode.getNodeSize() > 0) {
- //尾字符下还存在字符,即标识即可
- subNode.end(false);
- return;
- }
- }
- if (subNode.end()) {
- map.clear();
- }
- map.put(chars[i], tempNode);
- tempNode = subNode;
- }
-
- for (Map.Entry entry : map.entrySet()) {
- WordDataTreeNode value = entry.getValue();
- //节点只有一个就置空
- if (value.getNodeSize() == 1) {
- value.clearNode();
- return;
- }
- //多个就删除
- value.removeNode(entry.getKey());
- }
- }
/**
* 新增敏感词
@@ -143,6 +100,21 @@ public class WordDataTree extends AbstractWordData {
}
}
+ @Override
+ protected synchronized void doRemoveWord(Collection collection) {
+ for (String word : collection) {
+ if (StringUtil.isEmpty(word)) {
+ continue;
+ }
+ removeWord(this.root, word);
+ }
+ }
+
+
+
+
+
+
/**
* 获取当前的 Map
* @param nowNode 当前节点
@@ -211,4 +183,48 @@ public class WordDataTree extends AbstractWordData {
tempNode.end(true);
}
+
+ private void removeWord(WordDataTreeNode root, String word){
+ WordDataTreeNode tempNode = root;
+ //需要删除的
+ Map map = new HashMap<>();
+ char[] chars = word.toCharArray();
+ int length = chars.length;
+ for (int i = 0; i < length; i++) {
+ //不存在第一个词
+ WordDataTreeNode subNode = tempNode.getSubNode(chars[i]);
+ if (subNode == null) {
+ return;
+ }
+ if (i == (length - 1)) {
+ //尾字符判断是否结束
+ if (!subNode.end()) {
+ return;
+ }
+ if (subNode.getNodeSize() > 0) {
+ //尾字符下还存在字符,即标识即可
+ subNode.end(false);
+ return;
+ }
+ }
+ if (subNode.end()) {
+ map.clear();
+ }
+ map.put(chars[i], tempNode);
+
+ tempNode = subNode;
+ }
+
+ for (Map.Entry entry : map.entrySet()) {
+ WordDataTreeNode value = entry.getValue();
+ //节点只有一个就置空
+ if (value.getNodeSize() == 1) {
+ value.clearNode();
+ return;
+ }
+ //多个就删除
+ value.removeNode(entry.getKey());
+ }
+ }
+
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java b/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java
index aac3136..d1888bd 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/result/WordLengthResult.java
@@ -1,32 +1,48 @@
package com.github.houbb.sensitive.word.support.result;
+/**
+ * 说明:统一让黑白名单一次遍历,性能优化
+ *
+ * @since 0.24.2
+ */
public class WordLengthResult {
- private int wordAllowLen;
- private int wordDenyLen;
+ /**
+ * 白名单长度
+ */
+ private int wordAllowLen;
+ /**
+ * 黑名单长度
+ */
+ private int wordDenyLen;
-
- private WordLengthResult(){}
-
- public static WordLengthResult newInstance(){
+ public static WordLengthResult newInstance() {
return new WordLengthResult();
}
-
- public int wordAllowLen(){
+ public int wordAllowLen() {
return this.wordAllowLen;
}
- public WordLengthResult wordAllowLen(int wordAllowLen){
- this.wordAllowLen=wordAllowLen;
+
+ public WordLengthResult wordAllowLen(int wordAllowLen) {
+ this.wordAllowLen = wordAllowLen;
return this;
}
- public int wordDenyLen(){
+ public int wordDenyLen() {
return this.wordDenyLen;
}
- public WordLengthResult wordDenyLen(int wordDenyLen){
- this.wordDenyLen=wordDenyLen;
+
+ public WordLengthResult wordDenyLen(int wordDenyLen) {
+ this.wordDenyLen = wordDenyLen;
return this;
}
+ @Override
+ public String toString() {
+ return "WordLengthResult{" +
+ "wordAllowLen=" + wordAllowLen +
+ ", wordDenyLen=" + wordDenyLen +
+ '}';
+ }
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/tag/WordTags.java b/src/main/java/com/github/houbb/sensitive/word/support/tag/WordTags.java
index 379e77b..189bf0c 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/tag/WordTags.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/tag/WordTags.java
@@ -71,6 +71,8 @@ public class WordTags {
/**
* 根据标准的约定行处理
* @param lines 行信息
+ * @param wordSplit 单词分割
+ * @param tagSplit 标签分割
* @return 结果
*/
public static IWordTag lines(final Collection lines, final String wordSplit, final String tagSplit) {
diff --git a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java
index 670d08b..476bad0 100644
--- a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java
@@ -72,44 +72,37 @@ public class BenchmarkBasicTest {
}
/**
- * * 黑白名单一次遍历 优化前:300*他们在地铁口交易,查10000次,26183
- * * 黑白名单一次遍历 优化后:300*他们在地铁口交易,查10000次,15705
- *
+ * 黑白名单一次遍历 优化前:300*他们在地铁口交易,查10000次,26183
+ * 黑白名单一次遍历 优化后:300*他们在地铁口交易,查10000次,15705
+ * @since 0.24.2
*/
@Test
public void costTimeOneTraceTest() {
- StringBuilder sb=new StringBuilder();
- for(int i=0;i<300;i++){
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < 300; i++) {
sb.append("他们在地铁口交易").append(i);
}
String text = sb.toString();
// 1W 次
long start = System.currentTimeMillis();
- SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
- .wordDeny(new IWordDeny() {
- @Override
- public List deny() {
- return Collections.singletonList("口交");
- }
- })
- .wordAllow(new IWordAllow() {
- @Override
- public List allow() {
- return Collections.singletonList("地铁口交易");
- }
- })
- .enableWordCheck(true)
- .enableNumCheck(false)
- .enableUrlCheck(false)
- .enableEmailCheck(false)
- .init();
+ SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().wordDeny(new IWordDeny() {
+ @Override
+ public List deny() {
+ return Collections.singletonList("口交");
+ }
+ }).wordAllow(new IWordAllow() {
+ @Override
+ public List allow() {
+ return Collections.singletonList("地铁口交易");
+ }
+ }).enableWordCheck(true).enableNumCheck(false).enableUrlCheck(false).enableEmailCheck(false).init();
- for(int i = 0; i < 10000; i++) {
+ for (int i = 0; i < 10000; i++) {
sensitiveWordBs.findAll(text);
}
long end = System.currentTimeMillis();
- System.out.println("------------------ COST: " + (end-start));
+ System.out.println("------------------ COST: " + (end - start));
}
/**
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java
index 14c8b32..6612bcb 100644
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsConfigTest.java
@@ -4,8 +4,10 @@ import com.github.houbb.heaven.util.io.FileUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.data.WordCountDto;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
+import com.github.houbb.sensitive.word.support.check.WordChecks;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
+import com.github.houbb.sensitive.word.support.replace.WordReplaces;
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
import com.github.houbb.sensitive.word.support.tag.WordTags;
import org.junit.Assert;
@@ -39,46 +41,19 @@ public class SensitiveWordBsConfigTest {
.enableUrlCheck(false)
.enableIpv4Check(false)
.enableWordCheck(true)
+ .wordCheckNum(WordChecks.num())
+ .wordCheckEmail(WordChecks.email())
+ .wordCheckUrl(WordChecks.url())
+ .wordCheckIpv4(WordChecks.ipv4())
+ .wordCheckWord(WordChecks.word())
.numCheckLen(8)
.wordTag(WordTags.none())
.charIgnore(SensitiveWordCharIgnores.defaults())
.wordResultCondition(WordResultConditions.alwaysTrue())
.wordAllow(WordAllows.defaults())
.wordDeny(WordDenys.defaults())
+ .wordReplace(WordReplaces.defaults())
.init();
-
-// String dir = "D:\\code\\github\\houbb.github.io\\_posts";
-// File[] files = new File(dir).listFiles();
-//
-// Set wordSet = new HashSet<>();
-//
-// Map wordCountMap = new HashMap<>();
-// for(File file : files) {
-// String content = FileUtil.getFileContent(file);
-// List allWords = wordBs.findAll(content);
-//
-// for(String word : allWords) {
-// Integer integer = wordCountMap.get(word);
-// if(integer == null) {
-// integer = 0;
-// }
-//
-// integer++;
-// wordCountMap.put(word, integer);
-// }
-//
-// System.out.println(file.getName());
-// }
-//
-//// List wordCountDtoList = new ArrayList<>();
-// for(Map.Entry entry : wordCountMap.entrySet()) {
-// if(entry.getValue() >= 3) {
-// System.out.println(entry.getKey() + " : " + entry.getValue());
-// }
-// }
-
-// Collections.sort(wordCountDtoList);
-// System.out.println(wordCountDtoList);
}
@Test
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlNoPrefixTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlNoPrefixTest.java
new file mode 100644
index 0000000..b5f6543
--- /dev/null
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsUrlNoPrefixTest.java
@@ -0,0 +1,37 @@
+package com.github.houbb.sensitive.word.bs;
+
+import com.github.houbb.sensitive.word.support.check.WordChecks;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.List;
+
+/**
+ * project: sensitive-word-SensitiveWordBsTest
+ * create on 2020/1/7 23:43
+ *
+ * @author Administrator
+ * @since 0.25.0
+ */
+public class SensitiveWordBsUrlNoPrefixTest {
+
+ /**
+ * URL 检测
+ *
+ * @since 0.25.0
+ */
+ @Test
+ public void urlNoPrefixTest() {
+ final String text = "点击链接 https://www.baidu.com 查看答案,当然也可以是 baidu.com、www.baidu.com";
+
+ final SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
+ .enableUrlCheck(true) // 启用URL检测
+ .wordCheckUrl(WordChecks.urlNoPrefix()) //指定检测的方式
+ .init();
+ List wordList = sensitiveWordBs.findAll(text);
+ Assert.assertEquals("[www.baidu.com, baidu.com, www.baidu.com]", wordList.toString());
+
+ Assert.assertEquals("点击链接 https://************* 查看答案,当然也可以是 *********、*************", sensitiveWordBs.replace(text));
+ }
+
+}
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java
new file mode 100644
index 0000000..2c0f819
--- /dev/null
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordMaxFirstTest.java
@@ -0,0 +1,28 @@
+package com.github.houbb.sensitive.word.bs;
+
+import com.github.houbb.sensitive.word.api.IWordDeny;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.List;
+
+public class SensitiveWordMaxFirstTest {
+
+ @Test
+ public void maxFirstTest() {
+ SensitiveWordBs bs = SensitiveWordBs.newInstance()
+ .wordDeny(new IWordDeny() {
+ @Override
+ public List deny() {
+ return Arrays.asList("我的世界", "我的");
+ }
+ }).init();
+
+ String text = "我的世界我的好玩";
+
+ List textList = bs.findAll(text);
+// Assert.assertEquals("", textList.toString());
+ }
+
+}