[Feature] add for new

2026-03-22 16:37:17 +08:00 · 2020-01-08 10:57:24 +08:00
parent 6c05aa8ddc
commit b700d12aa3
15 changed files with 392 additions and 207 deletions
--- a/README.md
+++ b/README.md
@@ -40,7 +40,7 @@
 <dependency>
    <groupId>com.github.houbb</groupId>
    <artifactId>sensitive-word</artifactId>
-    <version>0.0.1</version>
+    <version>0.0.2</version>
 </dependency>
 ```

@@ -51,7 +51,7 @@
 ### 判断是否包含敏感词

 ```java
-final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。。";
+final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。";

 Assert.assertTrue(SensitiveWordBs.getInstance().contains(text));
 ```
--- a/doc/CHANGE_LOG.md
+++ b/doc/CHANGE_LOG.md
@@ -14,3 +14,12 @@
 | 序号 | 变更类型 | 说明 | 时间 | 备注 |
 |:---|:---|:---|:---|:--|
 | 1 | A | 基本功能的实现 | 2020-1-7 21:46:32 | |
+
+# release_0.0.2
+
+| 序号 | 变更类型 | 说明 | 时间 | 备注 |
+|:---|:---|:---|:---|:--|
+| 1 | O | 优化最大长度匹配模式 | 2020-1-8 09:34:35 | |
+| 2 | A | 新增替换实现 | 2020-1-8 09:34:35 | 性能优于各种博客的直接正则替换。|
+| 3 | O | 优化公共代码到 heaven 项目 | 2020-1-8 09:34:35 | 便于后期统一维护整理。|
+| 4 | O | 初步优化 DFA 对应 map 的大小 | 2020-1-8 09:34:35 | |
--- a/doc/issues/关联框架.md
+++ b/doc/issues/关联框架.md
@@ -8,4 +8,10 @@

 中文英文转换

-手写 Regex
+手写 Regex
+
+## 核心原理
+
+DFA 算法
+
+根据又穷状态机去处理。
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
        <project.compiler.level>1.7</project.compiler.level>

        <!--============================== INTER ==============================-->
-        <heaven.version>0.1.66</heaven.version>
+        <heaven.version>0.1.67-SNAPSHOT</heaven.version>
        <!--============================== OTHER ==============================-->
        <junit.version>4.12</junit.version>
    </properties>
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
@@ -47,4 +47,16 @@ public interface IWordMap {
     */
    String findFirst(final String string);

+    /**
+     * 替换所有敏感词内容
+     *
+     * ps: 这里可以添加优化。
+     *
+     * @param target 目标字符串
+     * @param replaceChar 替换为的 char
+     * @return 替换后结果
+     * @since 0.0.2
+     */
+    String replace(final String target, final char replaceChar);
+
 }
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
@@ -1,9 +1,7 @@
 package com.github.houbb.sensitive.word.bs;

+import com.github.houbb.heaven.constant.CharConst;
 import com.github.houbb.heaven.support.instance.impl.Instances;
-import com.github.houbb.heaven.util.guava.Guavas;
-import com.github.houbb.heaven.util.lang.StringUtil;
-import com.github.houbb.heaven.util.util.CollectionUtil;
 import com.github.houbb.sensitive.word.api.IWordData;
 import com.github.houbb.sensitive.word.api.IWordMap;
 import com.github.houbb.sensitive.word.support.data.SensitiveWordData;
@@ -59,16 +57,6 @@ public class SensitiveWordBs {
        return INSTANCE;
    }

-    /**
-     * 是否合法
-     * @param target 目标字符串
-     * @return 是否
-     * @since 0.0.1
-     */
-    public boolean valid(final String target) {
-        return !contains(target);
-    }
-
    /**
     * 是否包含敏感词
     * @param target 目标字符串
@@ -102,4 +90,25 @@ public class SensitiveWordBs {
        return this.sensitiveWordMap.findFirst(target);
    }

+    /**
+     * 替换所有内容
+     * @param target 目标字符串
+     * @param replaceChar 替换为的 char
+     * @return 替换后结果
+     * @since 0.0.2
+     */
+    public String replace(final String target, final char replaceChar) {
+        return this.sensitiveWordMap.replace(target, replaceChar);
+    }
+
+    /**
+     * 替换所有内容
+     * @param target 目标字符串
+     * @return 替换后结果
+     * @since 0.0.2
+     */
+    public String replace(final String target) {
+        return this.replace(target, CharConst.STAR);
+    }
+
 }
--- a/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java
+++ b/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java
@@ -9,11 +9,20 @@ package com.github.houbb.sensitive.word.constant;
 */
 public final class AppConst {

+    private AppConst(){}
+
    /**
     * 是否为结束标识
     * ps: 某种角度而言，我不是很喜欢这种风格。
+     * （1）正常的 char 只會占用一個字符，这里直接给定两个字符即可，降低 Map 的容量。
     * @since 0.0.1
     */
-    public static final String IS_END = "isEnd";
+    public static final String IS_END = "ED";
+
+    /**
+     * 字典的大小
+     * @since 0.0.1
+     */
+    public static final int DICT_SIZE = 183836;

 }
--- a/src/main/java/com/github/houbb/sensitive/word/model/CheckSensitiveWordResult.java
+++ b/src/main/java/com/github/houbb/sensitive/word/model/CheckSensitiveWordResult.java
@@ -0,0 +1,69 @@
+package com.github.houbb.sensitive.word.model;
+
+/**
+ * 检测敏感词结果
+ *
+ * TODO: 这里需要结合 KMP 和 暴力匹配算法。
+ *
+ * 暂时不使用，后期会使用到。
+ * @author binbin.hou
+ * @since 0.0.2
+ */
+@Deprecated
+public class CheckSensitiveWordResult {
+
+    /**
+     * 是否匹配到了敏感词
+     * @since 0.0.2
+     */
+    private boolean hasMatch;
+
+    /**
+     * 敏感词长度
+     * @since 0.0.2
+     */
+    private int sensitiveWordSize;
+
+    /**
+     * 普通单词的长度
+     * @since 0.0.2
+     */
+    private int commonWordSize;
+
+    public boolean hasMatch() {
+        return hasMatch;
+    }
+
+    public CheckSensitiveWordResult hasMatch(boolean hasMatch) {
+        this.hasMatch = hasMatch;
+        return this;
+    }
+
+    public int sentiveWordSize() {
+        return sensitiveWordSize;
+    }
+
+    public CheckSensitiveWordResult sentiveWordSize(int sentiveWordSize) {
+        this.sensitiveWordSize = sentiveWordSize;
+        return this;
+    }
+
+    public int commonWordSize() {
+        return commonWordSize;
+    }
+
+    public CheckSensitiveWordResult commonWordSize(int commonWordSize) {
+        this.commonWordSize = commonWordSize;
+        return this;
+    }
+
+    @Override
+    public String toString() {
+        return "CheckSensitiveWordResult{" +
+                "hasMatch=" + hasMatch +
+                ", sensitiveWordSize=" + sensitiveWordSize +
+                ", commonWordSize=" + commonWordSize +
+                '}';
+    }
+
+}
--- a/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java
@@ -1,10 +1,11 @@
 package com.github.houbb.sensitive.word.support.data;

 import com.github.houbb.heaven.annotation.ThreadSafe;
+import com.github.houbb.heaven.util.guava.Guavas;
+import com.github.houbb.heaven.util.io.StreamUtil;
 import com.github.houbb.sensitive.word.api.IWordData;
-import com.github.houbb.sensitive.word.util.StreamUtils;
+import com.github.houbb.sensitive.word.constant.AppConst;

-import java.util.ArrayList;
 import java.util.List;

 /**
@@ -26,8 +27,8 @@ public class SensitiveWordData implements IWordData {
    static {
        synchronized (SensitiveWordData.class) {
            long start = System.currentTimeMillis();
-            defaultLines = new ArrayList<>(183836);
-            defaultLines = StreamUtils.readAllLines("/dict.txt");
+            defaultLines = Guavas.newArrayList(AppConst.DICT_SIZE);
+            defaultLines = StreamUtil.readAllLines("/dict.txt");
            long end = System.currentTimeMillis();
            System.out.println("Sensitive data loaded!, cost time: " + (end - start) + " ms");
        }
--- a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
@@ -2,6 +2,7 @@ package com.github.houbb.sensitive.word.support.map;

 import com.github.houbb.heaven.annotation.ThreadSafe;
 import com.github.houbb.heaven.util.guava.Guavas;
+import com.github.houbb.heaven.util.lang.CharUtil;
 import com.github.houbb.heaven.util.lang.ObjectUtil;
 import com.github.houbb.heaven.util.lang.StringUtil;
 import com.github.houbb.heaven.util.util.CollectionUtil;
@@ -95,7 +96,7 @@ public class SensitiveWordMap implements IWordMap {
        }

        long endTime = System.currentTimeMillis();
-        System.out.println("Init sensitive word map end! Cost time " + (endTime-startTime) + "ms");
+        System.out.println("Init sensitive word map end! Cost time " + (endTime - startTime) + "ms");
    }

    /**
@@ -114,7 +115,7 @@ public class SensitiveWordMap implements IWordMap {
        }

        for (int i = 0; i < string.length(); i++) {
-            int checkResult = checkSensitiveWord(string, i);
+            int checkResult = checkSensitiveWord(string, i, ValidModeEnum.FAIL_FAST);
            // 快速返回
            if (checkResult > 0) {
                return true;
@@ -148,6 +149,15 @@ public class SensitiveWordMap implements IWordMap {
        return stringList.get(0);
    }

+    @Override
+    public String replace(String target, char replaceChar) {
+        if(StringUtil.isEmpty(target)) {
+            return target;
+        }
+
+        return this.replaceSensitiveWord(target, ValidModeEnum.FAIL_OVER, replaceChar);
+    }
+
    /**
     * 获取敏感词列表
     *
@@ -164,15 +174,15 @@ public class SensitiveWordMap implements IWordMap {

        List<String> resultList = Guavas.newArrayList();
        for (int i = 0; i < text.length(); i++) {
-            int wordLength = checkSensitiveWord(text, i);
+            int wordLength = checkSensitiveWord(text, i, ValidModeEnum.FAIL_OVER);

            // 命中
            if (wordLength > 0) {
                // 保存敏感词
-                String sensitiveWord = text.substring(i, i+wordLength);
+                String sensitiveWord = text.substring(i, i + wordLength);

                // 添加去重
-                if(!resultList.contains(sensitiveWord)) {
+                if (!resultList.contains(sensitiveWord)) {
                    resultList.add(sensitiveWord);
                }

@@ -183,6 +193,7 @@ public class SensitiveWordMap implements IWordMap {

                // 增加 i 的步长
                // 为什么要-1，因为默认就会自增1
+                // TODO: 这里可以根据字符串匹配算法优化。
                i += wordLength - 1;
            }
        }
@@ -196,17 +207,24 @@ public class SensitiveWordMap implements IWordMap {
     * （1）如果未命中敏感词，直接返回 0
     * （2）命中敏感词，则返回敏感词的长度。
     *
-     * @param txt        文本信息
-     * @param beginIndex 开始下标
+     * ps: 这里结果进行优化，
+     * 1. 是否包含敏感词。
+     * 2. 敏感词的长度
+     * 3. 正常走过字段的长度（便于后期替换优化，避免不必要的循环重复）
+     *
+     * @param txt           文本信息
+     * @param beginIndex    开始下标
+     * @param validModeEnum 验证模式
     * @return 敏感词对应的长度
     * @since 0.0.1
     */
-    private int checkSensitiveWord(String txt, int beginIndex) {
+    private int checkSensitiveWord(final String txt, final int beginIndex,
+                                   final ValidModeEnum validModeEnum) {
        Map nowMap = sensitiveWordMap;

-        boolean flag = false;
        // 记录敏感词的长度
-        int sensitiveWordLength = 0;
+        int lengthCount = 0;
+        int actualLength = 0;

        for (int i = beginIndex; i < txt.length(); i++) {
            char charKey = txt.charAt(i);
@@ -214,15 +232,20 @@ public class SensitiveWordMap implements IWordMap {
            // 并且将 nowMap 替换为新的 map，进入下一层的循环。
            nowMap = (Map) nowMap.get(charKey);
            if (ObjectUtil.isNotNull(nowMap)) {
-                sensitiveWordLength++;
+                lengthCount++;

                // 判断是否是敏感词的结尾字，如果是结尾字则判断是否继续检测
                boolean isEnd = (boolean) nowMap.get(AppConst.IS_END);
                if (isEnd) {
-                    flag = true;
+                    // 只在匹配到结束的时候才记录长度，避免不完全匹配导致的问题。
+                    // eg: 敏感词 敏感词xxx
+                    // 如果是 【敏感词x】也会被匹配。
+                    actualLength = lengthCount;

-                    // 这里直接默认 fail-fast 即可。
-                    break;
+                    // 这里确实需要一种验证模式，主要是为了最大匹配从而达到最佳匹配的效果。
+                    if (ValidModeEnum.FAIL_FAST.equals(validModeEnum)) {
+                        break;
+                    }
                }
            } else {
                // 直接跳出循环
@@ -230,10 +253,44 @@ public class SensitiveWordMap implements IWordMap {
            }
        }

-        if (!flag) {
-            sensitiveWordLength = 0;
+        return actualLength;
+    }
+
+    /**
+     * 直接替换敏感词，返回替换后的结果
+     * @param target           文本信息
+     * @param validModeEnum 验证模式
+     * @return 脱敏后的字符串
+     * @since 0.0.2
+     */
+    private String replaceSensitiveWord(final String target,
+                                        final ValidModeEnum validModeEnum,
+                                        final char replaceChar) {
+        if(StringUtil.isEmpty(target)) {
+            return target;
        }
-        return sensitiveWordLength;
+        // 用于结果构建
+        StringBuilder resultBuilder = new StringBuilder();
+
+        for (int i = 0; i < target.length(); i++) {
+            char currentChar = target.charAt(i);
+            // 内层直接从 i 开始往后遍历，这个算法的，获取第一个匹配的单词
+            int wordLength = checkSensitiveWord(target, i, validModeEnum);
+
+            // 敏感词
+            if(wordLength > 0) {
+                String replaceStr = CharUtil.repeat(replaceChar, wordLength);
+                resultBuilder.append(replaceStr);
+
+                // 直接跳过敏感词的长度
+                i += wordLength-1;
+            } else {
+                // 普通词
+                resultBuilder.append(currentChar);
+            }
+        }
+
+        return resultBuilder.toString();
    }

 }
--- a/src/main/java/com/github/houbb/sensitive/word/util/CharsetUtils.java
+++ b/src/main/java/com/github/houbb/sensitive/word/util/CharsetUtils.java
@@ -1,70 +1,70 @@
-package com.github.houbb.sensitive.word.util;
-
-import com.github.houbb.heaven.annotation.CommonEager;
-import com.github.houbb.heaven.util.lang.StringUtil;
-
-/**
- * @author binbin.hou
- * @since 0.0.1
- */
-@CommonEager
-public class CharsetUtils {
-
-    /**
-     * 是否为中文字符
-     * @param c char
-     * @return 是否
-     * @since 0.0.1
-     */
-    public static boolean isChinese(char c) {
-        boolean result = false;
-        // 汉字范围 \u4e00-\u9fa5 (中文)
-        if (c >= 19968 && c <= 171941) {
-            result = true;
-        }
-        return result;
-    }
-
-    /**
-     * 是否包含中文
-     * @param string 字符串
-     * @return 是否
-     * @since 0.0.1
-     */
-    public static boolean isContainsChinese(String string) {
-        if(StringUtil.isEmpty(string)) {
-            return false;
-        }
-
-        char[] chars = string.toCharArray();
-        for(char c : chars) {
-            if(isChinese(c)) {
-                return true;
-            }
-        }
-
-        return false;
-    }
-
-    /**
-     * 是否全是中文
-     * @param string 字符串
-     * @return 是否
-     * @since 0.0.1
-     */
-    public static boolean isAllChinese(String string) {
-        if(StringUtil.isEmpty(string)) {
-            return false;
-        }
-
-        char[] chars = string.toCharArray();
-        for(char c : chars) {
-            if(!isChinese(c)) {
-                return false;
-            }
-        }
-
-        return true;
-    }
-
-}
+//package com.github.houbb.sensitive.word.util;
+//
+//import com.github.houbb.heaven.annotation.CommonEager;
+//import com.github.houbb.heaven.util.lang.StringUtil;
+//
+///**
+// * @author binbin.hou
+// * @since 0.0.1
+// */
+//@CommonEager
+//public class CharsetUtils {
+//
+//    /**
+//     * 是否为中文字符
+//     * @param c char
+//     * @return 是否
+//     * @since 0.0.1
+//     */
+//    public static boolean isChinese(char c) {
+//        boolean result = false;
+//        // 汉字范围 \u4e00-\u9fa5 (中文)
+//        if (c >= 19968 && c <= 171941) {
+//            result = true;
+//        }
+//        return result;
+//    }
+//
+//    /**
+//     * 是否包含中文
+//     * @param string 字符串
+//     * @return 是否
+//     * @since 0.0.1
+//     */
+//    public static boolean isContainsChinese(String string) {
+//        if(StringUtil.isEmpty(string)) {
+//            return false;
+//        }
+//
+//        char[] chars = string.toCharArray();
+//        for(char c : chars) {
+//            if(isChinese(c)) {
+//                return true;
+//            }
+//        }
+//
+//        return false;
+//    }
+//
+//    /**
+//     * 是否全是中文
+//     * @param string 字符串
+//     * @return 是否
+//     * @since 0.0.1
+//     */
+//    public static boolean isAllChinese(String string) {
+//        if(StringUtil.isEmpty(string)) {
+//            return false;
+//        }
+//
+//        char[] chars = string.toCharArray();
+//        for(char c : chars) {
+//            if(!isChinese(c)) {
+//                return false;
+//            }
+//        }
+//
+//        return true;
+//    }
+//
+//}
--- a/src/main/java/com/github/houbb/sensitive/word/util/StreamUtils.java
+++ b/src/main/java/com/github/houbb/sensitive/word/util/StreamUtils.java
@@ -1,72 +1,72 @@
-package com.github.houbb.sensitive.word.util;
-
-import com.github.houbb.heaven.annotation.CommonEager;
-import com.github.houbb.heaven.constant.CharsetConst;
-import com.github.houbb.heaven.util.lang.StringUtil;
-import com.github.houbb.sensitive.word.exception.SensitiveWordException;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.Charset;
-import java.util.*;
-
-/**
- * 流工具类
- * @author binbin.hou
- * @since 0.0.1
- */
-@CommonEager
-public final class StreamUtils {
-
-    private StreamUtils(){}
-
-    /**
-     * 构建数据集合
-     *
-     * 后期考虑：是否允许用户自定义字典？
-     * 目前不支持这些操作。后期如果需要，再把这些限制放开。
-     * @param path 文件路径
-     * @return 返回数据集合
-     * @since 0.0.1
-     */
-    public static List<String> readAllLines(final String path) {
-        return readAllLines(path, CharsetConst.UTF8, true);
-    }
-
-    /**
-     * 构建数据集合
-     *
-     * 后期考虑：是否允许用户自定义字典？
-     * 目前不支持这些操作。后期如果需要，再把这些限制放开。
-     * @param path 文件路径
-     * @param charset 文件编码
-     * @param ignoreEmpty 是否忽略空白行
-     * @return 返回数据集合
-     * @since 0.0.1
-     */
-    public static List<String> readAllLines(final String path,
-                                            final String charset,
-                                            final boolean ignoreEmpty) {
-        try {
-            List<String> lines = new ArrayList<>();
-            InputStream is = StreamUtils.class.getResourceAsStream(path);
-            BufferedReader e = new BufferedReader(new InputStreamReader(is,
-                    Charset.forName(charset)));
-
-            while (e.ready()) {
-                String entry = e.readLine();
-                if (StringUtil.isEmpty(entry)
-                    && ignoreEmpty) {
-                    continue;
-                }
-                lines.add(entry);
-            }
-            return lines;
-        } catch (IOException e) {
-            throw new SensitiveWordException("dict init failed!", e);
-        }
-    }
-
-}
+//package com.github.houbb.sensitive.word.util;
+//
+//import com.github.houbb.heaven.annotation.CommonEager;
+//import com.github.houbb.heaven.constant.CharsetConst;
+//import com.github.houbb.heaven.util.lang.StringUtil;
+//import com.github.houbb.sensitive.word.exception.SensitiveWordException;
+//
+//import java.io.BufferedReader;
+//import java.io.IOException;
+//import java.io.InputStream;
+//import java.io.InputStreamReader;
+//import java.nio.charset.Charset;
+//import java.util.*;
+//
+///**
+// * 流工具类
+// * @author binbin.hou
+// * @since 0.0.1
+// */
+//@CommonEager
+//public final class StreamUtils {
+//
+//    private StreamUtils(){}
+//
+//    /**
+//     * 构建数据集合
+//     *
+//     * 后期考虑：是否允许用户自定义字典？
+//     * 目前不支持这些操作。后期如果需要，再把这些限制放开。
+//     * @param path 文件路径
+//     * @return 返回数据集合
+//     * @since 0.0.1
+//     */
+//    public static List<String> readAllLines(final String path) {
+//        return readAllLines(path, CharsetConst.UTF8, true);
+//    }
+//
+//    /**
+//     * 构建数据集合
+//     *
+//     * 后期考虑：是否允许用户自定义字典？
+//     * 目前不支持这些操作。后期如果需要，再把这些限制放开。
+//     * @param path 文件路径
+//     * @param charset 文件编码
+//     * @param ignoreEmpty 是否忽略空白行
+//     * @return 返回数据集合
+//     * @since 0.0.1
+//     */
+//    public static List<String> readAllLines(final String path,
+//                                            final String charset,
+//                                            final boolean ignoreEmpty) {
+//        try {
+//            List<String> lines = new ArrayList<>();
+//            InputStream is = StreamUtils.class.getResourceAsStream(path);
+//            BufferedReader e = new BufferedReader(new InputStreamReader(is,
+//                    Charset.forName(charset)));
+//
+//            while (e.ready()) {
+//                String entry = e.readLine();
+//                if (StringUtil.isEmpty(entry)
+//                    && ignoreEmpty) {
+//                    continue;
+//                }
+//                lines.add(entry);
+//            }
+//            return lines;
+//        } catch (IOException e) {
+//            throw new SensitiveWordException("dict init failed!", e);
+//        }
+//    }
+//
+//}
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java
@@ -14,13 +14,21 @@ import java.util.List;
 */
 public class SensitiveWordBsTest {

+    /**
+     * 是否包含
+     * @since 0.0.1
+     */
    @Test
    public void containsTest() {
-        final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。。";
+        final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。";

        Assert.assertTrue(SensitiveWordBs.getInstance().contains(text));
    }

+    /**
+     * 返回所有敏感词
+     * @since 0.0.1
+     */
    @Test
    public void findAllTest() {
        final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。";
@@ -29,6 +37,10 @@ public class SensitiveWordBsTest {
        Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString());
    }

+    /**
+     * 返回所有第一个匹配的敏感词
+     * @since 0.0.1
+     */
    @Test
    public void findFirstTest() {
        final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。";
@@ -37,4 +49,28 @@ public class SensitiveWordBsTest {
        Assert.assertEquals("五星红旗", word);
    }

+    /**
+     * 默认的替换策略
+     * @since 0.0.2
+     */
+    @Test
+    public void replaceTest() {
+        final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。";
+
+        String result = SensitiveWordBs.getInstance().replace(text);
+        Assert.assertEquals("****迎风飘扬，***的画像屹立在***前。", result);
+    }
+
+    /**
+     * 自定义字符的替换策略
+     * @since 0.0.2
+     */
+    @Test
+    public void replaceCharTest() {
+        final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。";
+
+        String result = SensitiveWordBs.getInstance().replace(text, '0');
+        Assert.assertEquals("0000迎风飘扬，000的画像屹立在000前。", result);
+    }
+
 }
--- a/src/test/java/com/github/houbb/sensitive/word/data/DataInitTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/data/DataInitTest.java
@@ -3,8 +3,8 @@ package com.github.houbb.sensitive.word.data;
 import com.github.houbb.heaven.support.filter.IFilter;
 import com.github.houbb.heaven.util.io.FileUtil;
 import com.github.houbb.heaven.util.lang.StringUtil;
+import com.github.houbb.heaven.util.util.CharsetUtil;
 import com.github.houbb.heaven.util.util.CollectionUtil;
-import com.github.houbb.sensitive.word.util.CharsetUtils;
 import org.junit.Ignore;
 import org.junit.Test;

@@ -38,7 +38,6 @@ public class DataInitTest {
        List<String> trimLines = CollectionUtil.distinct(CollectionUtil.trimCollection(lines));

        final String target = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
-        FileUtil.write(target, trimLines);
    }

    /**
@@ -65,7 +64,7 @@ public class DataInitTest {
        List<String> resultList = CollectionUtil.distinct(CollectionUtil.filterList(lines, new IFilter<String>() {
            @Override
            public boolean filter(String s) {
-                return CharsetUtils.isContainsChinese(s);
+                return CharsetUtil.isContainsChinese(s);
            }
        }));
        Collections.sort(resultList);
--- a/src/test/java/com/github/houbb/sensitive/word/util/StreamUtilsTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/util/StreamUtilsTest.java
@@ -1,22 +0,0 @@
-package com.github.houbb.sensitive.word.util;
-
-import org.junit.Assert;
-import org.junit.Test;
-
-import java.util.List;
-
-/**
- * @author binbin.hou
- * @since 0.0.1
- */
-public class StreamUtilsTest {
-
-    @Test
-    public void sizeTest() {
-        final String dictPath = "/dict.txt";
-
-        List<String> stringList = StreamUtils.readAllLines(dictPath);
-        Assert.assertEquals(183836, stringList.size());
-    }
-
-}