diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index ec3ac82..15f6fd0 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -336,3 +336,10 @@ | 序号 | 变更类型 | 说明 | 时间 | 备注 | |:---|:-----|-----------------------------|:-------------------|:-----| | 1 | A | 优化自定义策略,避免出现返回 null 导致的 NPE | 2024-8-28 15:02:25 | | + +# release_0.19.0 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|------------------------|:-------------------|:-----| +| 1 | A | 单个词的新增/删除 | 2024-8-28 15:02:25 | | +| 2 | A | allow/deny 的空实现,便于测试场景 | 2024-8-28 15:02:25 | | diff --git a/README.md b/README.md index 4eb7226..0aa216f 100644 --- a/README.md +++ b/README.md @@ -48,17 +48,16 @@ - [支持跳过一些特殊字符,让匹配更灵活](https://github.com/houbb/sensitive-word#%E5%BF%BD%E7%95%A5%E5%AD%97%E7%AC%A6) +- [支持单个词的新增/修改,无需全量初始化]() + ## 变更日志 [CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md) -### V0.17.0 +### V0.19.0 -- 支持 ipv4 - -### V0.18.0 - -- 优化 URL 检测,降低误判率 +- 针对单个词的新增/删除,无需全量初始化 +- 新增 allow/deny 空实现 ## 更多资料 @@ -90,7 +89,7 @@ com.github.houbb sensitive-word - 0.18.1 + 0.19.0 ``` @@ -486,6 +485,58 @@ SensitiveWordBs wordBs = SensitiveWordBs.newInstance() wordBs.destroy(); ``` +## 针对单个词的新增/删除,无需全量初始化 + +使用场景:在初始化之后,我们希望针对单个词的新增/删除,而不是完全重新初始化。这个特性就是为此准备的。 + +支持版本:v0.19.0 + +### 方法说明 + +`addWord(word)` 新增敏感词,支持单个词/集合 + +`removeWord(word)` 删除敏感词,支持单个词/集合 + +### 实例代码: + +```java +final String text = "测试一下新增敏感词,验证一下删除和新增对不对"; + +SensitiveWordBs sensitiveWordBs = +SensitiveWordBs.newInstance() + .wordAllow(WordAllows.empty()) + .wordDeny(WordDenys.empty()) + .init(); + +// 当前 +Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString()); + +// 新增单个 +sensitiveWordBs.addWord("测试"); +sensitiveWordBs.addWord("新增"); +Assert.assertEquals("[测试, 新增, 新增]", sensitiveWordBs.findAll(text).toString()); + +// 删除单个 +sensitiveWordBs.removeWord("新增"); +Assert.assertEquals("[测试]", sensitiveWordBs.findAll(text).toString()); +sensitiveWordBs.removeWord("测试"); +Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString()); + +// 新增集合 +sensitiveWordBs.addWord(Arrays.asList("新增", "测试")); +Assert.assertEquals("[测试, 新增, 新增]", sensitiveWordBs.findAll(text).toString()); +// 删除集合 +sensitiveWordBs.removeWord(Arrays.asList("新增", "测试")); +Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString()); + +// 新增数组 +sensitiveWordBs.addWord("新增", "测试"); +Assert.assertEquals("[测试, 新增, 新增]", sensitiveWordBs.findAll(text).toString()); +// 删除集合 +sensitiveWordBs.removeWord("新增", "测试"); +Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString()); +``` + # wordResultCondition-针对匹配词进一步判断 ## 说明 diff --git a/pom.xml b/pom.xml index 75eef36..3a2432b 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.18.1 + 0.19.0 diff --git a/release.bat b/release.bat index 06fae90..e50ee6b 100644 --- a/release.bat +++ b/release.bat @@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..." :: 版本号信息(需要手动指定) :::: 旧版本名称 -SET version=0.18.1 +SET version=0.19.0 :::: 新版本名称 -SET newVersion=0.19.0 +SET newVersion=0.20.0 :::: 组织名称 SET groupName=com.github.houbb :::: 项目名称 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java index 29f25f5..7aa55cd 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java @@ -13,7 +13,6 @@ import java.util.Collection; */ public interface IWordData extends ISensitiveWordDestroy { - /** * 初始化敏感词 map * @param collection 集合信息 @@ -23,17 +22,17 @@ public interface IWordData extends ISensitiveWordDestroy { /** * 删除敏感词 - * @param word + * @param word 单词 + * @since 0.19.0 */ - default void removeWord(String word){ - - } + void removeWord(String word); /** * 新增敏感词 - * @param collection + * @param collection 敏感词集合 + * @since 0.19.0 */ - void saveWordData(Collection collection); + void addWord(Collection collection); /** * 是否包含敏感词 diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index 49a4662..2606583 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -21,10 +21,7 @@ import com.github.houbb.sensitive.word.support.result.WordResultHandlers; import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions; import com.github.houbb.sensitive.word.support.tag.WordTags; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Set; +import java.util.*; /** * 敏感词引导类 @@ -595,6 +592,56 @@ public class SensitiveWordBs implements ISensitiveWordDestroy { this.wordData.destroy(); } + /** + * 删除敏感词 + * @param word 单词 + * @since 0.19.0 + */ + public void removeWord(String word, String ... others) { + List wordList = new ArrayList<>(); + wordList.add(word); + wordList.addAll(Arrays.asList(others)); + + removeWord(wordList); + } + + /** + * 删除单词 + * @param collection 集合 + * @since 0.19.0 + */ + public void removeWord(Collection collection) { + if(CollectionUtil.isEmpty(collection)) { + return; + } + for(String word : collection) { + this.wordData.removeWord(word); + } + } + + /** + * 新增敏感词 + * @param collection 敏感词集合 + * @since 0.19.0 + */ + public void addWord(Collection collection) { + this.wordData.addWord(collection); + } + + /** + * 新增敏感词 + * @param word 敏感词 + * @param others 其他 + * @since 0.19.0 + */ + public void addWord(String word, String...others) { + List wordList = new ArrayList<>(); + wordList.add(word); + wordList.addAll(Arrays.asList(others)); + + this.addWord(wordList); + } + //------------------------------------------------------------------------------------ 公开方法 END } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowEmpty.java b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowEmpty.java new file mode 100644 index 0000000..581195b --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllowEmpty.java @@ -0,0 +1,24 @@ +package com.github.houbb.sensitive.word.support.allow; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.heaven.util.io.StreamUtil; +import com.github.houbb.sensitive.word.api.IWordAllow; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * 空列表 + * @author binbin.hou + * @since 0.19.0 + */ +@ThreadSafe +public class WordAllowEmpty implements IWordAllow { + + @Override + public List allow() { + return new ArrayList<>(); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java index 008647f..be51624 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java @@ -45,4 +45,14 @@ public final class WordAllows { return WordAllowSystem.getInstance(); } + + /** + * 空实现,可测试用 + * @return 结果 + * @since 0.19.0 + */ + public static IWordAllow empty() { + return new WordAllowEmpty(); + } + } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java b/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java index d2659bc..06515d5 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java @@ -1,5 +1,7 @@ package com.github.houbb.sensitive.word.support.data; +import com.github.houbb.heaven.util.lang.StringUtil; +import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.sensitive.word.api.IWordData; import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; @@ -27,6 +29,18 @@ public abstract class AbstractWordData implements IWordData { */ protected abstract void doInitWordData(Collection collection); + /** + * 删除敏感词 + * @param word 敏感词 + */ + protected abstract void doRemoveWord(String word); + + /** + * 新增敏感词 + * @param collection 敏感词 + */ + protected abstract void doAddWord(Collection collection); + @Override public void initWordData(Collection collection) { //1. 预留 @@ -34,6 +48,24 @@ public abstract class AbstractWordData implements IWordData { this.doInitWordData(collection); } + @Override + public void removeWord(String word) { + if(StringUtil.isEmpty(word)) { + return; + } + + doRemoveWord(word); + } + + @Override + public void addWord(Collection collection) { + if(CollectionUtil.isEmpty(collection)) { + return; + } + + doAddWord(collection); + } + @Override public WordContainsTypeEnum contains(StringBuilder stringBuilder, InnerSensitiveWordContext innerContext) { if(stringBuilder == null diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java index 696f5b0..de46765 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java @@ -15,10 +15,12 @@ import java.util.Map; /** * 敏感词 map * + * 不再维护,降低维护成本 + * * @author binbin.hou * @since 0.0.1 */ -@ThreadSafe +@Deprecated public class WordDataHashMap extends AbstractWordData { /** @@ -87,6 +89,16 @@ public class WordDataHashMap extends AbstractWordData { this.innerWordMap = newInnerWordMap; } + @Override + protected void doRemoveWord(String word) { + + } + + @Override + protected void doAddWord(Collection collection) { + + } + /** * 是否包含 * (1)直接遍历所有 diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java index 61ffa50..453c841 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java @@ -16,12 +16,16 @@ import java.util.Map; * 敏感词 map * PR:https://github.com/houbb/sensitive-word/pull/33 * + * PR: https://github.com/houbb/sensitive-word/pull/74 + * * @author xiaochangbai * @author binbin.hou + * @author zldaysleepy + * * @since 0.7.0 */ @ThreadSafe -public class WordDataTree implements IWordData { +public class WordDataTree extends AbstractWordData { @Override public synchronized void initWordData(Collection collection) { @@ -31,8 +35,7 @@ public class WordDataTree implements IWordData { if (StringUtil.isEmpty(word)) { continue; } - saveWord(newRoot, word); - + addWord(newRoot, word); } // 初始化完成才做替换 @@ -42,27 +45,47 @@ public class WordDataTree implements IWordData { * 根节点 */ private WordDataTreeNode root; - /** - * 新增敏感词 - * - * @param collection - */ + @Override - public synchronized void saveWordData(Collection collection) { + protected WordContainsTypeEnum doContains(StringBuilder stringBuilder, InnerSensitiveWordContext innerContext) { + WordDataTreeNode nowNode = root; + + int len = stringBuilder.length(); + + for(int i = 0; i < len; i++) { + // 获取当前的 map 信息 + nowNode = getNowMap(nowNode, i, stringBuilder, innerContext); + + // 如果不为空,则判断是否为结尾。 + if (ObjectUtil.isNull(nowNode)) { + return WordContainsTypeEnum.NOT_FOUND; + } + } + + if(nowNode.end()) { + return WordContainsTypeEnum.CONTAINS_END; + } + + return WordContainsTypeEnum.CONTAINS_PREFIX; + } + + @Override + protected void doInitWordData(Collection collection) { + WordDataTreeNode newRoot = new WordDataTreeNode(); + for (String word : collection) { if (StringUtil.isEmpty(word)) { continue; } - saveWord(this.root, word); - + addWord(newRoot, word); } + + // 初始化完成才做替换 + this.root = newRoot; } @Override - public synchronized void removeWord(String word) { - if (StringUtil.isEmpty(word)) { - return; - } + protected void doRemoveWord(String word) { WordDataTreeNode tempNode = root; //需要删除的 Map map = new HashMap<>(); @@ -90,9 +113,9 @@ public class WordDataTree implements IWordData { } map.put(chars[i], tempNode); - tempNode = subNode; } + for (Map.Entry entry : map.entrySet()) { WordDataTreeNode value = entry.getValue(); //节点只有一个就置空 @@ -103,35 +126,23 @@ public class WordDataTree implements IWordData { //多个就删除 value.removeNode(entry.getKey()); } - } - + /** + * 新增敏感词 + * + * @param collection 敏感词集合 + */ @Override - public WordContainsTypeEnum contains(StringBuilder stringBuilder, - InnerSensitiveWordContext innerContext) { - WordDataTreeNode nowNode = root; - - int len = stringBuilder.length(); - - for(int i = 0; i < len; i++) { - // 获取当前的 map 信息 - nowNode = getNowMap(nowNode, i, stringBuilder, innerContext); - - // 如果不为空,则判断是否为结尾。 - if (ObjectUtil.isNull(nowNode)) { - return WordContainsTypeEnum.NOT_FOUND; + public synchronized void doAddWord(Collection collection) { + for (String word : collection) { + if (StringUtil.isEmpty(word)) { + continue; } + addWord(this.root, word); } - - if(nowNode.end()) { - return WordContainsTypeEnum.CONTAINS_END; - } - - return WordContainsTypeEnum.CONTAINS_PREFIX; } - /** * 获取当前的 Map * @param nowNode 当前节点 @@ -173,7 +184,14 @@ public class WordDataTree implements IWordData { } } - public void saveWord(WordDataTreeNode newRoot, String word) { + /** + * 添加敏感词 + * @param newRoot 新的根节点 + * @param word 单词 + * + * @since 0.19.0 + */ + private void addWord(WordDataTreeNode newRoot, String word) { WordDataTreeNode tempNode = newRoot; char[] chars = word.toCharArray(); for (char c : chars) { @@ -192,4 +210,5 @@ public class WordDataTree implements IWordData { // 设置结束标识(循环结束,设置一次即可) tempNode.end(true); } + } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDatas.java b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDatas.java index 28333aa..64dc8f9 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDatas.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDatas.java @@ -30,14 +30,4 @@ public final class WordDatas { return new WordDataTree(); } - /** - * 树模式 - * @return 树 - * @since 0.7.0 - */ - public static IWordData hashMap() { - return new WordDataHashMap(); - } - - } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenyEmpty.java b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenyEmpty.java new file mode 100644 index 0000000..3c52fd6 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenyEmpty.java @@ -0,0 +1,24 @@ +package com.github.houbb.sensitive.word.support.deny; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.heaven.util.io.StreamUtil; +import com.github.houbb.sensitive.word.api.IWordDeny; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * 空实现 + * @author binbin.hou + * @since 0.19.0 + */ +@ThreadSafe +public class WordDenyEmpty implements IWordDeny { + + @Override + public List deny() { + return new ArrayList<>(); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java index dfcd524..1377f04 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java @@ -45,4 +45,14 @@ public final class WordDenys { return WordDenySystem.getInstance(); } + /** + * 空实现 + * @return 结果 + * @since 0.19.13 + */ + public static IWordDeny empty() { + return new WordDenyEmpty(); + } + + } diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEditWordTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEditWordTest.java new file mode 100644 index 0000000..08b79b5 --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEditWordTest.java @@ -0,0 +1,62 @@ +package com.github.houbb.sensitive.word.bs; + +import com.github.houbb.sensitive.word.support.allow.WordAllows; +import com.github.houbb.sensitive.word.support.deny.WordDenys; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +/** + *

project: sensitive-word-SensitiveWordBsTest

+ *

create on 2020/1/7 23:43

+ * + * @author Administrator + * @since 0.19.0 + */ +public class SensitiveWordBsEditWordTest { + + /** + * @since 0.19.0 + */ + @Test + public void editWordTest() { + final String text = "测试一下新增敏感词,验证一下删除和新增对不对"; + + SensitiveWordBs sensitiveWordBs = + SensitiveWordBs.newInstance() + .wordAllow(WordAllows.empty()) + .wordDeny(WordDenys.empty()) + .init(); + + // 当前 + Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString()); + + // 新增单个 + sensitiveWordBs.addWord("测试"); + sensitiveWordBs.addWord("新增"); + Assert.assertEquals("[测试, 新增, 新增]", sensitiveWordBs.findAll(text).toString()); + + // 删除单个 + sensitiveWordBs.removeWord("新增"); + Assert.assertEquals("[测试]", sensitiveWordBs.findAll(text).toString()); + sensitiveWordBs.removeWord("测试"); + Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString()); + + // 新增集合 + sensitiveWordBs.addWord(Arrays.asList("新增", "测试")); + Assert.assertEquals("[测试, 新增, 新增]", sensitiveWordBs.findAll(text).toString()); + // 删除集合 + sensitiveWordBs.removeWord(Arrays.asList("新增", "测试")); + Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString()); + + // 新增数组 + sensitiveWordBs.addWord("新增", "测试"); + Assert.assertEquals("[测试, 新增, 新增]", sensitiveWordBs.findAll(text).toString()); + // 删除集合 + sensitiveWordBs.removeWord("新增", "测试"); + Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString()); + } + +}