release branch 0.7.0

2026-03-22 08:27:36 +08:00 · 2023-06-09 17:21:48 +08:00
parent 258622e8a2
commit 1df1e6aec1
27 changed files with 431 additions and 347 deletions
--- a/CHANGE_LOG.md
+++ b/CHANGE_LOG.md
@@ -184,3 +184,9 @@
 |:---|:-----|--------------------------|:--------------------|:-------|
 | 1  | O    | 性能优化：字符映射统一处理一遍，而不是每次都处理 | 2023-06-09 23:51:58 |  |
 | 2  | D    | 移除废弃的 replaceContext     | 2023-06-09 23:51:58 |  |
+
+# release_0.7.0
+
+| 序号 | 变更类型 | 说明                                          | 时间                  | 备注              |
+|:---|:-----|---------------------------------------------|:--------------------|:----------------|
+| 1  | A    | IWordMap 命名调整为 IWordData, 添加 Tree 实现。优化内存占用 | 2023-06-09 23:51:58 | 避免过于限制，放开便于后续拓展 |
--- a/README.md
+++ b/README.md
@@ -58,7 +58,7 @@
 <dependency>
    <groupId>com.github.houbb</groupId>
    <artifactId>sensitive-word</artifactId>
-    <version>0.6.0</version>
+    <version>0.7.0</version>
 </dependency>
 ```

@@ -662,12 +662,16 @@ ps: 不同环境会有差异，但是比例基本稳定。

 # 后期 road-map

- [x] wordMap 的抽象，便于拓展
+- [x] wordData 的内存占用对比 + 优化

- [x] word 的统一性能优化，移除 string 的生成
+- [ ] 用户指定自定义的词组，同时允许指定词组的组合获取，更加灵活
+
+ICharFormat/ISensitiveCheck/Word 方法，允许用户自定义。

 - [ ] word check 策略的优化，统一遍历+转换

+- [ ] DFA 数据结构的另一种实现
+
 - 同音字处理

 - 形近字处理
@@ -678,10 +682,6 @@ ps: 不同环境会有差异，但是比例基本稳定。

 - 敏感词标签支持

- [ ] DFA 数据结构的另一种实现
-
-放开 wordMap 策略定义
-
 # 拓展阅读

 [敏感词工具实现思路](https://houbb.github.io/2020/01/07/sensitive-word)
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.github.houbb</groupId>
    <artifactId>sensitive-word</artifactId>
-    <version>0.6.0</version>
+    <version>0.7.0</version>

    <properties>
        <!--============================== All Plugins START ==============================-->
@@ -68,6 +68,14 @@
                <optional>true</optional>
            </dependency>

+            <dependency>
+                <groupId>org.apache.lucene</groupId>
+                <artifactId>lucene-core</artifactId>
+                <version>4.0.0</version>
+                <scope>test</scope>
+                <optional>true</optional>
+            </dependency>
+
        </dependencies>
    </dependencyManagement>

@@ -91,6 +99,11 @@
            <artifactId>junit</artifactId>
        </dependency>

+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-core</artifactId>
+        </dependency>
+
    </dependencies>

    <build>
--- a/release.bat
+++ b/release.bat
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."

 :: 版本号信息(需要手动指定)
 :::: 旧版本名称
-SET version=0.6.0
+SET version=0.7.0
 :::: 新版本名称
-SET newVersion=0.7.0
+SET newVersion=0.8.0
 :::: 组织名称
 SET groupName=com.github.houbb
 :::: 项目名称
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
@@ -223,7 +223,7 @@ public interface IWordContext {
     * @return 策略
     * @since 0.3.2
     */
-    IWordMap wordMap();
+    IWordData wordData();

    /**
     * 设置 wordMap 策略
@@ -231,6 +231,6 @@ public interface IWordContext {
     * @return this
     * @since 0.3.2
     */
-    IWordContext wordMap(IWordMap wordMap);
+    IWordContext wordData(IWordData wordMap);

 }
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordData.java
@@ -11,7 +11,7 @@ import java.util.Collection;
 * @author binbin.hou
 * @since 0.0.1
 */
-public interface IWordMap {
+public interface IWordData {


    /**
@@ -19,7 +19,7 @@ public interface IWordMap {
     * @param collection 集合信息
     * @since 0.0.1
     */
-    void initWordMap(Collection<String> collection);
+    void initWordData(Collection<String> collection);

    /**
     * 是否包含敏感词
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
@@ -10,7 +10,7 @@ import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;
 import com.github.houbb.sensitive.word.support.check.impl.SensitiveChecks;
 import com.github.houbb.sensitive.word.support.deny.WordDenys;
 import com.github.houbb.sensitive.word.support.format.CharFormats;
-import com.github.houbb.sensitive.word.support.map.WordMaps;
+import com.github.houbb.sensitive.word.support.data.WordDatas;
 import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaces;
 import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
 import com.github.houbb.sensitive.word.utils.InnerWordDataUtils;
@@ -95,31 +95,29 @@ public class SensitiveWordBs {
    private ISensitiveWord sensitiveWord = SensitiveWords.defaults();

    /**
-     * 敏感词 map
-     *
-     * TODO: 暂时定义为 final，后续放开抽象。
+     * 敏感词 Data
     *
     * @since 0.0.1
     */
-    private final IWordMap wordMap = WordMaps.defaults();
+    private IWordData wordData = WordDatas.defaults();

    /**
     * 禁止的单词
     * @since 0.0.13
     */
-    private IWordDeny wordDeny = WordDenys.system();
+    private IWordDeny wordDeny = WordDenys.defaults();

    /**
     * 允许的单词
     * @since 0.0.13
     */
-    private IWordAllow wordAllow = WordAllows.system();
+    private IWordAllow wordAllow = WordAllows.defaults();

    /**
     * 替换策略
     * @since 0.3.0
     */
-    private ISensitiveWordReplace sensitiveWordReplace = SensitiveWordReplaces.chars();
+    private ISensitiveWordReplace sensitiveWordReplace = SensitiveWordReplaces.defaults();

    /**
     * 上下文
@@ -191,7 +189,7 @@ public class SensitiveWordBs {
        // 额外配置
        context.sensitiveCheckNumLen(numCheckLen);
        context.sensitiveWordReplace(sensitiveWordReplace);
-        context.wordMap(wordMap);
+        context.wordData(wordData);

        return context;
    }
@@ -209,7 +207,20 @@ public class SensitiveWordBs {
        List<String> results = InnerWordDataUtils.getActualDenyList(denyList, allowList, context);

        // 便于可以多次初始化
-        wordMap.initWordMap(results);
+        wordData.initWordData(results);
+    }
+
+    /**
+     * 允许指定策略数据
+     * @param wordData 单词数据
+     * @return 结果
+     * @since 0.7.0
+     */
+    public SensitiveWordBs wordData(IWordData wordData) {
+        ArgUtil.notNull(wordData, "wordData");
+
+        this.wordData = wordData;
+        return this;
    }

    public SensitiveWordBs sensitiveWord(ISensitiveWord sensitiveWord) {
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
@@ -3,7 +3,7 @@ package com.github.houbb.sensitive.word.bs;
 import com.github.houbb.sensitive.word.api.ICharFormat;
 import com.github.houbb.sensitive.word.api.ISensitiveWordReplace;
 import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.api.IWordMap;
+import com.github.houbb.sensitive.word.api.IWordData;
 import com.github.houbb.sensitive.word.support.check.ISensitiveCheck;

 /**
@@ -102,14 +102,14 @@ public class SensitiveWordContext implements IWordContext {
     *
     * @since 0.3.2
     */
-    private IWordMap wordMap;
+    private IWordData wordData;

-    public IWordMap wordMap() {
-        return wordMap;
+    public IWordData wordData() {
+        return wordData;
    }

-    public SensitiveWordContext wordMap(IWordMap wordMap) {
-        this.wordMap = wordMap;
+    public SensitiveWordContext wordData(IWordData wordData) {
+        this.wordData = wordData;
        return this;
    }

--- a/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/allow/WordAllows.java
@@ -41,7 +41,7 @@ public final class WordAllows {
     * @return 结果
     * @since 0.0.13
     */
-    public static IWordAllow system() {
+    public static IWordAllow defaults() {
        return WordAllowSystem.getInstance();
    }

--- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/SensitiveCheckWord.java
@@ -2,7 +2,7 @@ package com.github.houbb.sensitive.word.support.check.impl;

 import com.github.houbb.heaven.annotation.ThreadSafe;
 import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.api.IWordMap;
+import com.github.houbb.sensitive.word.api.IWordData;
 import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
 import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
 import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
@@ -41,7 +41,7 @@ public class SensitiveCheckWord extends AbstractSensitiveCheck {

        // 采用 ThreadLocal 应该可以提升性能，减少对象的创建。
        int actualLength = 0;
-        final IWordMap wordMap = context.wordMap();
+        final IWordData wordData = context.wordData();

        // 前一个条件
        StringBuilder stringBuilder = new StringBuilder();
@@ -53,7 +53,7 @@ public class SensitiveCheckWord extends AbstractSensitiveCheck {
            stringBuilder.append(mappingChar);

            // 判断是否存在
-            WordContainsTypeEnum wordContainsTypeEnum = wordMap.contains(stringBuilder, innerContext);
+            WordContainsTypeEnum wordContainsTypeEnum = wordData.contains(stringBuilder, innerContext);
            if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) {
                actualLength = stringBuilder.length();

--- a/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/data/AbstractWordData.java
@@ -0,0 +1,49 @@
+package com.github.houbb.sensitive.word.support.data;
+
+import com.github.houbb.heaven.util.lang.StringUtil;
+import com.github.houbb.heaven.util.util.CollectionUtil;
+import com.github.houbb.sensitive.word.api.IWordData;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
+import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
+
+import java.util.Collection;
+
+/**
+ * 抽象数据
+ *
+ * @since 0.7.0
+ */
+public abstract class AbstractWordData implements IWordData {
+
+    /**
+     * 是否包含
+     * @param stringBuilder 字符
+     * @param innerContext 上下文
+     * @return 结果
+     */
+    protected abstract WordContainsTypeEnum doContains(StringBuilder stringBuilder, InnerSensitiveContext innerContext);
+
+    /**
+     * 初始化
+     * @param collection 数据
+     */
+    protected abstract void doInitWordData(Collection<String> collection);
+
+    @Override
+    public void initWordData(Collection<String> collection) {
+        //1. 预留
+
+        this.doInitWordData(collection);
+    }
+
+    @Override
+    public WordContainsTypeEnum contains(StringBuilder stringBuilder, InnerSensitiveContext innerContext) {
+        if(stringBuilder == null
+            || stringBuilder.length() <= 0) {
+            return WordContainsTypeEnum.NOT_FOUND;
+        }
+
+        return doContains(stringBuilder, innerContext);
+    }
+
+}
--- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java
@@ -1,10 +1,9 @@
-package com.github.houbb.sensitive.word.support.map;
+package com.github.houbb.sensitive.word.support.data;

 import com.github.houbb.heaven.annotation.ThreadSafe;
 import com.github.houbb.heaven.util.lang.ObjectUtil;
 import com.github.houbb.heaven.util.lang.StringUtil;
 import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.api.IWordMap;
 import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
 import com.github.houbb.sensitive.word.constant.AppConst;
 import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
@@ -20,7 +19,7 @@ import java.util.Map;
 * @since 0.0.1
 */
@ThreadSafe
-public class WordMap implements IWordMap {
+public class WordDataHashMap extends AbstractWordData {

    /**
     * 脱敏单词 map
@@ -41,7 +40,7 @@ public class WordMap implements IWordMap {
     */
    @Override
    @SuppressWarnings("unchecked")
-    public synchronized void initWordMap(Collection<String> collection) {
+    public synchronized void doInitWordData(Collection<String> collection) {
        // 避免扩容带来的消耗
        Map newInnerWordMap = new HashMap(collection.size());

@@ -78,12 +77,10 @@ public class WordMap implements IWordMap {
                    // 将新节点设置为当前节点，方便下一次节点的循环。
                    currentMap = newWordMap;
                }
-
-                // 判断是否为最后一个，添加是否结束的标识。
-                if (i == size - 1) {
-                    currentMap.put(AppConst.IS_END, true);
-                }
            }
+
+            // 判断是否为最后一个，添加是否结束的标识。
+            currentMap.put(AppConst.IS_END, true);
        }

        // 最后更新为新的 map，保证更新过程中旧的数据可用
@@ -101,13 +98,8 @@ public class WordMap implements IWordMap {
     * @since 0.0.1
     */
    @Override
-    public WordContainsTypeEnum contains(final StringBuilder stringBuilder,
+    public WordContainsTypeEnum doContains(final StringBuilder stringBuilder,
                                         final InnerSensitiveContext innerContext) {
-        if (stringBuilder == null
-            || stringBuilder.length() <= 0) {
-            return WordContainsTypeEnum.NOT_FOUND;
-        }
-
        return innerContainsSensitive(stringBuilder, innerContext);
    }

--- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTree.java
@@ -0,0 +1,123 @@
+package com.github.houbb.sensitive.word.support.data;
+
+import com.github.houbb.heaven.annotation.ThreadSafe;
+import com.github.houbb.heaven.util.lang.ObjectUtil;
+import com.github.houbb.heaven.util.lang.StringUtil;
+import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.api.IWordData;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveContext;
+import com.github.houbb.sensitive.word.constant.AppConst;
+import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * 敏感词 map
+ * PR：https://github.com/houbb/sensitive-word/pull/33
+ *
+ * @author xiaochangbai
+ * @author binbin.hou
+ * @since 0.7.0
+ */
+@ThreadSafe
+public class WordDataTree implements IWordData {
+
+    /**
+     * 根节点
+     */
+    private WordDataTreeNode root;
+
+    @Override
+    public synchronized void initWordData(Collection<String> collection) {
+        WordDataTreeNode newRoot = new WordDataTreeNode();
+
+        for(String word : collection) {
+            if(StringUtil.isEmpty(word)) {
+                continue;
+            }
+
+            WordDataTreeNode tempNode = newRoot;
+            char[] chars = word.toCharArray();
+            for (char c : chars) {
+                // 获取子节点
+                WordDataTreeNode subNode = tempNode.getSubNode(c);
+                if (subNode == null) {
+                    subNode = new WordDataTreeNode();
+                    // 加入新的子节点
+                    tempNode.addSubNode(c, subNode);
+                }
+
+                // 临时节点指向子节点，进入下一次循环
+                tempNode = subNode;
+            }
+
+            // 设置结束标识（循环结束，设置一次即可）
+            tempNode.end(true);
+        }
+
+        // 初始化完成才做替换
+        this.root = newRoot;
+    }
+
+    @Override
+    public WordContainsTypeEnum contains(StringBuilder stringBuilder,
+                                         InnerSensitiveContext innerContext) {
+        WordDataTreeNode nowNode = root;
+
+        int len = stringBuilder.length();
+
+        for(int i = 0; i < len; i++) {
+            // 获取当前的 map 信息
+            nowNode = getNowMap(nowNode, i, stringBuilder, innerContext);
+
+            // 如果不为空，则判断是否为结尾。
+            if (ObjectUtil.isNull(nowNode)) {
+                return WordContainsTypeEnum.NOT_FOUND;
+            }
+        }
+
+        if(nowNode.end()) {
+            return WordContainsTypeEnum.CONTAINS_END;
+        }
+
+        return WordContainsTypeEnum.CONTAINS_PREFIX;
+    }
+
+
+    /**
+     * 获取当前的 Map
+     * @param nowNode 当前节点
+     * @param index 下标
+     * @param stringBuilder 文本缓存
+     * @param sensitiveContext 上下文
+     * @return 实际的当前 map
+     * @since 0.0.7
+     */
+    private WordDataTreeNode getNowMap(WordDataTreeNode nowNode,
+                          final int index,
+                          final StringBuilder stringBuilder,
+                          final InnerSensitiveContext sensitiveContext) {
+        final IWordContext context = sensitiveContext.wordContext();
+
+        // 这里的 char 已经是统一格式化之后的，所以可以不用再次格式化。
+        char mappingChar = stringBuilder.charAt(index);
+
+        // 这里做一次重复词的处理
+        WordDataTreeNode currentMap = nowNode.getSubNode(mappingChar);
+        // 启用忽略重复&当前下标不是第一个
+        if(context.ignoreRepeat()
+                && index > 0) {
+            char preMappingChar = stringBuilder.charAt(index-1);
+
+            // 直接赋值为上一个 map
+            if(preMappingChar == mappingChar) {
+                currentMap = nowNode;
+            }
+        }
+
+        return currentMap;
+    }
+
+}
--- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTreeNode.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataTreeNode.java
@@ -0,0 +1,49 @@
+package com.github.houbb.sensitive.word.support.data;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * 树节点
+ *
+ * @since 0.7.0
+ */
+public class WordDataTreeNode {
+
+    /**
+     * 关键词结束标识
+     */
+    private boolean end;
+
+    /**
+     * 子节点(key是下级字符,value是下级节点)
+     */
+    private Map<Character, WordDataTreeNode> subNodeMap;
+
+    public boolean end() {
+        return end;
+    }
+
+    public WordDataTreeNode end(boolean end) {
+        this.end = end;
+        return this;
+    }
+
+    public WordDataTreeNode getSubNode(final char c) {
+        if(subNodeMap == null) {
+            return null;
+        }
+
+        return subNodeMap.get(c);
+    }
+
+    public WordDataTreeNode addSubNode(char c, WordDataTreeNode subNode) {
+        if(this.subNodeMap == null) {
+            subNodeMap = new HashMap<>();
+        }
+
+        subNodeMap.put(c, subNode);
+        return this;
+    }
+
+}
--- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDatas.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDatas.java
@@ -0,0 +1,43 @@
+package com.github.houbb.sensitive.word.support.data;
+
+import com.github.houbb.sensitive.word.api.IWordData;
+
+/**
+ * 敏感词 map
+ *
+ * @author binbin.hou
+ * @since 0.3.0
+ */
+public final class WordDatas {
+
+    private WordDatas(){}
+
+    /**
+     * 默认策略
+     * @return 策略
+     * @since 0.3.0
+     */
+    public static IWordData defaults() {
+        return tree();
+    }
+
+    /**
+     * 树模式
+     * @return 树
+     * @since 0.7.0
+     */
+    public static IWordData tree() {
+        return new WordDataTree();
+    }
+
+    /**
+     * 树模式
+     * @return 树
+     * @since 0.7.0
+     */
+    public static IWordData hashMap() {
+        return new WordDataHashMap();
+    }
+
+
+}
--- a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java
@@ -41,7 +41,7 @@ public final class WordDenys {
     * @return 结果
     * @since 0.0.13
     */
-    public static IWordDeny system() {
+    public static IWordDeny defaults() {
        return WordDenySystem.getInstance();
    }

--- a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
@@ -1,265 +0,0 @@
-//package com.github.houbb.sensitive.word.support.map;
-//
-//import com.github.houbb.heaven.annotation.ThreadSafe;
-//import com.github.houbb.heaven.util.guava.Guavas;
-//import com.github.houbb.heaven.util.io.FileUtil;
-//import com.github.houbb.heaven.util.lang.ObjectUtil;
-//import com.github.houbb.heaven.util.lang.StringUtil;
-//import com.github.houbb.heaven.util.util.CollectionUtil;
-//import com.github.houbb.sensitive.word.api.*;
-//import com.github.houbb.sensitive.word.constant.AppConst;
-//import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
-//import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
-//import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl;
-//import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceContext;
-//import com.github.houbb.sensitive.word.support.result.WordResult;
-//
-//import java.util.Collection;
-//import java.util.HashMap;
-//import java.util.List;
-//import java.util.Map;
-//
-///**
-// * 敏感词 map
-// *
-// * @author binbin.hou
-// * @since 0.0.1
-// */
-//@ThreadSafe
-//public class SensitiveWordMap implements IWordMap {
-//
-//    /**
-//     * 脱敏单词 map
-//     *
-//     * @since 0.0.1
-//     */
-//    private Map innerWordMap;
-//
-//    /**
-//     * 读取敏感词库，将敏感词放入HashSet中，构建一个DFA算法模型：
-//     *
-//     * @param collection 敏感词库集合
-//     * @since 0.0.1
-//     * <p>
-//     * 使用对象代码 map 的这种一直递归。
-//     * 参考资料：https://www.cnblogs.com/AlanLee/p/5329555.html
-//     * https://blog.csdn.net/chenssy/article/details/26961957
-//     */
-//    @Override
-//    @SuppressWarnings("unchecked")
-//    public synchronized void initWordMap(Collection<String> collection) {
-//        // 避免扩容带来的消耗
-//        Map newInnerWordMap = new HashMap(collection.size());
-//
-//        for (String key : collection) {
-//            if (StringUtil.isEmpty(key)) {
-//                continue;
-//            }
-//
-//            // 用来按照相应的格式保存敏感词库数据
-//            char[] chars = key.toCharArray();
-//            final int size = chars.length;
-//
-//            // 每一个新词的循环，直接将结果设置为当前 map，所有变化都会体现在结果的 map 中
-//            Map currentMap = newInnerWordMap;
-//
-//            for (int i = 0; i < size; i++) {
-//                // 截取敏感词当中的字，在敏感词库中字为HashMap对象的Key键值
-//                char charKey = chars[i];
-//                // 如果集合存在
-//                Object wordMap = currentMap.get(charKey);
-//
-//                // 如果集合存在
-//                if (ObjectUtil.isNotNull(wordMap)) {
-//                    // 直接将获取到的 map 当前当前 map 进行继续的操作
-//                    currentMap = (Map) wordMap;
-//                } else {
-//                    //不存在则，则构建一个新的map，同时将isEnd设置为0，因为他不是最后一
-//                    Map<String, Boolean> newWordMap = new HashMap<>(8);
-//                    newWordMap.put(AppConst.IS_END, false);
-//
-//                    // 将新的节点放入当前 map 中
-//                    currentMap.put(charKey, newWordMap);
-//
-//                    // 将新节点设置为当前节点，方便下一次节点的循环。
-//                    currentMap = newWordMap;
-//                }
-//
-//                // 判断是否为最后一个，添加是否结束的标识。
-//                if (i == size - 1) {
-//                    currentMap.put(AppConst.IS_END, true);
-//                }
-//            }
-//        }
-//
-//        // 最后更新为新的 map，保证更新过程中旧的数据可用
-//        this.innerWordMap = newInnerWordMap;
-//    }
-//
-//    /**
-//     * 是否包含
-//     * （1）直接遍历所有
-//     * （2）如果遇到，则直接返回 true
-//     *
-//     * @param string 字符串
-//     * @return 是否包含
-//     * @since 0.0.1
-//     */
-//    @Override
-//    public boolean contains(String string, final IWordContext context) {
-//        if (StringUtil.isEmpty(string)) {
-//            return false;
-//        }
-//
-//        for (int i = 0; i < string.length(); i++) {
-//            SensitiveCheckResult checkResult = sensitiveCheck(string, i, ValidModeEnum.FAIL_FAST, context);
-//            // 快速返回
-//            if (checkResult.index() > 0) {
-//                return true;
-//            }
-//        }
-//        return false;
-//    }
-//
-//    /**
-//     * 返回所有对应的敏感词
-//     * （1）结果是有序的
-//     * （2）为了保留所有的下标，结果从 v0.1.0 之后不再去重。
-//     *
-//     * @param string 原始字符串
-//     * @return 结果
-//     * @since 0.0.1
-//     */
-//    @Override
-//    public List<IWordResult> findAll(String string, final IWordContext context) {
-//        return getSensitiveWords(string, ValidModeEnum.FAIL_OVER, context);
-//    }
-//
-//    @Override
-//    public IWordResult findFirst(String string, final IWordContext context) {
-//        List<IWordResult> stringList = getSensitiveWords(string, ValidModeEnum.FAIL_FAST, context);
-//
-//        if (CollectionUtil.isEmpty(stringList)) {
-//            return null;
-//        }
-//
-//        return stringList.get(0);
-//    }
-//
-//    @Override
-//    public String replace(String target, final IWordContext context) {
-//        if(StringUtil.isEmpty(target)) {
-//            return target;
-//        }
-//
-//        return this.replaceSensitiveWord(target, context);
-//    }
-//
-//    /**
-//     * 获取敏感词列表
-//     *
-//     * @param text     文本
-//     * @param modeEnum 模式
-//     * @return 结果列表
-//     * @since 0.0.1
-//     */
-//    private List<IWordResult> getSensitiveWords(final String text, final ValidModeEnum modeEnum,
-//                                           final IWordContext context) {
-//        //1. 是否存在敏感词，如果比存在，直接返回空列表
-//        if (StringUtil.isEmpty(text)) {
-//            return Guavas.newArrayList();
-//        }
-//
-//        List<IWordResult> resultList = Guavas.newArrayList();
-//        for (int i = 0; i < text.length(); i++) {
-//            SensitiveCheckResult checkResult = sensitiveCheck(text, i, ValidModeEnum.FAIL_OVER, context);
-//            // 命中
-//            int wordLength = checkResult.index();
-//            if (wordLength > 0) {
-//                // 保存敏感词
-//                String sensitiveWord = text.substring(i, i + wordLength);
-//
-//                // 添加去重
-//                WordResult wordResult = WordResult.newInstance()
-//                        .startIndex(i)
-//                        .endIndex(i+wordLength)
-//                        .word(sensitiveWord);
-//                resultList.add(wordResult);
-//
-//                // 快速返回
-//                if (ValidModeEnum.FAIL_FAST.equals(modeEnum)) {
-//                    break;
-//                }
-//
-//                // 增加 i 的步长
-//                // 为什么要-1，因为默认就会自增1
-//                // TODO: 这里可以根据字符串匹配算法优化。
-//                i += wordLength - 1;
-//            }
-//        }
-//
-//        return resultList;
-//    }
-//
-//    /**
-//     * 直接替换敏感词，返回替换后的结果
-//     * @param target           文本信息
-//     * @param context 上下文
-//     * @return 脱敏后的字符串
-//     * @since 0.0.2
-//     */
-//    private String replaceSensitiveWord(final String target,
-//                                        final IWordContext context) {
-//        if(StringUtil.isEmpty(target)) {
-//            return target;
-//        }
-//        // 用于结果构建
-//        StringBuilder resultBuilder = new StringBuilder(target.length());
-//
-//        for (int i = 0; i < target.length(); i++) {
-//            char currentChar = target.charAt(i);
-//            // 内层直接从 i 开始往后遍历，这个算法的，获取第一个匹配的单词
-//            SensitiveCheckResult checkResult = sensitiveCheck(target, i, ValidModeEnum.FAIL_OVER, context);
-//
-//            // 敏感词
-//            int wordLength = checkResult.index();
-//            if(wordLength > 0) {
-//                // 是否执行替换
-//                Class checkClass = checkResult.checkClass();
-//                String string = target.substring(i, i+wordLength);
-//                if(SensitiveCheckUrl.class.equals(checkClass)
-//                    && FileUtil.isImage(string)) {
-//                    // 直接使用原始内容，避免 markdown 图片转换失败
-//                    resultBuilder.append(string);
-//                } else {
-//                    // 创建上下文
-//                    ISensitiveWordReplaceContext replaceContext = SensitiveWordReplaceContext.newInstance()
-//                            .sensitiveWord(string)
-//                            .wordLength(wordLength);
-//                    String replaceStr = context.sensitiveWordReplace().replace(replaceContext);
-//
-//                    resultBuilder.append(replaceStr);
-//                }
-//
-//                // 直接跳过敏感词的长度
-//                i += wordLength-1;
-//            } else {
-//                // 普通词
-//                resultBuilder.append(currentChar);
-//            }
-//        }
-//
-//        return resultBuilder.toString();
-//    }
-//
-//    @Override
-//    public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
-//        // 默认执行敏感词操作
-//        context.sensitiveWordMap(innerWordMap);
-//
-//        // 责任链模式调用
-//        return context.sensitiveCheck()
-//                .sensitiveCheck(txt, beginIndex, validModeEnum, context);
-//    }
-//
-//}
--- a/src/main/java/com/github/houbb/sensitive/word/support/map/WordMaps.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/map/WordMaps.java
@@ -1,24 +0,0 @@
-package com.github.houbb.sensitive.word.support.map;
-
-import com.github.houbb.sensitive.word.api.IWordMap;
-
-/**
- * 敏感词 map
- *
- * @author binbin.hou
- * @since 0.3.0
- */
-public final class WordMaps {
-
-    private WordMaps(){}
-
-    /**
-     * 默认策略
-     * @return 策略
-     * @since 0.3.0
-     */
-    public static IWordMap defaults() {
-        return new WordMap();
-    }
-
-}
--- a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaces.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaces.java
@@ -31,4 +31,13 @@ public final class SensitiveWordReplaces {
        return new SensitiveWordReplaceChar();
    }

+    /**
+     * 字符，默认为 *
+     * @return 结果
+     * @since 0.7.0
+     */
+    public static ISensitiveWordReplace defaults() {
+        return chars();
+    }
+
 }
--- a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkTimesTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkTimesTest.java
@@ -13,6 +13,7 @@ public class BenchmarkTimesTest {
     * 测试基准：100+字符串 * 10W次
     *
     * V0.6.0: 1470ms，接近 7.2W QPS
+     * V0.7.0: 1380ms
     */
    @Test
    public void onlyWordAndNoReplaceTest() {
@@ -45,6 +46,7 @@ public class BenchmarkTimesTest {
     * 测试基准：100+字符串 * 10W次
     *
     * V0.6.0: 2744ms, 约 3.7W QPS
+     * V0.7.0: 2723ms
     */
    @Test
    public void onlyWordAndWithReplaceTest() {
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsDataTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsDataTest.java
@@ -0,0 +1,27 @@
+package com.github.houbb.sensitive.word.bs;
+
+import com.github.houbb.sensitive.word.support.data.WordDatas;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * <p> project: sensitive-word-SensitiveWordBsConfigTest </p>
+ * <p> create on 2020/1/7 23:43 </p>
+ *
+ * @author Administrator
+ * @since 0.7.0
+ */
+public class SensitiveWordBsDataTest {
+
+    @Test
+    public void wordDataConfigTest() {
+        SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
+                .wordData(WordDatas.tree())
+                .init();
+
+        final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。";
+        Assert.assertTrue(wordBs.contains(text));
+        Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordBs.findAll(text).toString());
+    }
+
+}
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java
@@ -106,8 +106,8 @@ public class SensitiveWordBsTest {
    @Test
    public void configTest() {
        SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
-                .wordDeny(WordDenys.system())
-                .wordAllow(WordAllows.system())
+                .wordDeny(WordDenys.defaults())
+                .wordAllow(WordAllows.defaults())
                .init();

        final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。";
--- a/src/test/java/com/github/houbb/sensitive/word/bugs/b20211211/MySensitiveTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bugs/b20211211/MySensitiveTest.java
@@ -12,8 +12,8 @@ public class MySensitiveTest {

    @Test
    public void test() {
-        IWordDeny wordDeny = WordDenys.chains(WordDenys.system(), new MyWordDeny());
-        IWordAllow wordAllow = WordAllows.chains(WordAllows.system(), new MyWordAllow());
+        IWordDeny wordDeny = WordDenys.chains(WordDenys.defaults(), new MyWordDeny());
+        IWordAllow wordAllow = WordAllows.chains(WordAllows.defaults(), new MyWordAllow());
        SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
                .wordAllow(wordAllow)
                .wordDeny(wordDeny)// 各种其他配置
--- a/src/test/java/com/github/houbb/sensitive/word/bugs/b32/MyWordDenyChineseTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bugs/b32/MyWordDenyChineseTest.java
@@ -10,7 +10,7 @@ public class MyWordDenyChineseTest  {

    @Test
    public void test() {
-        IWordDeny wordDeny = WordDenys.chains(WordDenys.system(), new MyWordDenyChineseNum());
+        IWordDeny wordDeny = WordDenys.chains(WordDenys.defaults(), new MyWordDenyChineseNum());
        SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
                .wordDeny(wordDeny)// 各种其他配置
                .init();// init() 初始化敏感词字典
--- a/src/test/java/com/github/houbb/sensitive/word/define/SensitiveWordBsDefineTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/define/SensitiveWordBsDefineTest.java
@@ -30,8 +30,8 @@ public class SensitiveWordBsDefineTest {
    public void defineChainsTest() {
        String text = "这是一个测试。我的自定义敏感词。";

-        IWordDeny wordDeny = WordDenys.chains(WordDenys.system(), new MyWordDeny());
-        IWordAllow wordAllow = WordAllows.chains(WordAllows.system(), new MyWordAllow());
+        IWordDeny wordDeny = WordDenys.chains(WordDenys.defaults(), new MyWordDeny());
+        IWordAllow wordAllow = WordAllows.chains(WordAllows.defaults(), new MyWordAllow());

        SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
                .wordDeny(wordDeny)
--- a/src/test/java/com/github/houbb/sensitive/word/memory/DataMemoryTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/memory/DataMemoryTest.java
@@ -0,0 +1,49 @@
+package com.github.houbb.sensitive.word.memory;
+
+import com.github.houbb.heaven.util.io.StreamUtil;
+import com.github.houbb.sensitive.word.api.IWordData;
+import com.github.houbb.sensitive.word.support.data.WordDatas;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.util.List;
+
+/**
+ * 数据内存测试
+ *
+ * @since 0.7.0
+ */
+@Ignore
+public class DataMemoryTest {
+
+    /**
+     * 35.5 MB
+     */
+    @Test
+    public void hashMapTest() {
+        List<String> allLines = StreamUtil.readAllLines("/dict.txt");
+        IWordData wordData = WordDatas.defaults();
+
+        wordData.initWordData(allLines);
+
+        //计算指定对象及其引用树上的所有对象的综合大小，返回可读的结果，如：2KB
+        String humanSize = RamUsageEstimator.humanSizeOf(wordData);
+        System.out.println(humanSize);
+    }
+
+
+    //33.4 MB
+    @Test
+    public void treeTest() {
+        List<String> allLines = StreamUtil.readAllLines("/dict.txt");
+        IWordData wordData = WordDatas.tree();
+
+        wordData.initWordData(allLines);
+
+        //计算指定对象及其引用树上的所有对象的综合大小，返回可读的结果，如：2KB
+        String humanSize = RamUsageEstimator.humanSizeOf(wordData);
+        System.out.println(humanSize);
+    }
+
+}
--- a/src/test/java/com/github/houbb/sensitive/word/spring/SpringSensitiveWordConfig.java
+++ b/src/test/java/com/github/houbb/sensitive/word/spring/SpringSensitiveWordConfig.java
@@ -29,7 +29,7 @@ public class SpringSensitiveWordConfig {
    @Bean
    public SensitiveWordBs sensitiveWordBs() {
        SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
-                .wordAllow(WordAllows.chains(WordAllows.system(), myDdWordAllow))
+                .wordAllow(WordAllows.chains(WordAllows.defaults(), myDdWordAllow))
                .wordDeny(myDdWordDeny)
                // 各种其他配置
                .init();