release branch 0.0.1

2026-03-22 08:27:36 +08:00 · 2020-01-08 00:16:08 +08:00
parent 3108126817
commit 6b656b26d9
18 changed files with 721 additions and 197 deletions
--- a/README.md
+++ b/README.md
@@ -5,3 +5,71 @@
 [![Maven Central](https://maven-badges.herokuapp.com/maven-central/com.github.houbb/sensitive-word/badge.svg)](http://mvnrepository.com/artifact/com.github.houbb/sensitive-word)

 [![](https://img.shields.io/badge/license-Apache2-FF0080.svg)](https://github.com/houbb/sensitive-word/blob/master/LICENSE.txt)
+
+## 创作目的
+
+实现一款好用敏感词工具。
+
+基于 DFA 算法实现，目前敏感词库内容收录 18W+ 感觉过于臃肿。
+
+后期将进行相关优化，降低字典的数量。
+
+希望可以细化敏感词的分类，感觉工作量比较大，暂时没有太好的思路。
+
+## 后期目标
+
+- 持续扩容对应的敏感词（如合法的数据抓取）
+
+- 添加英文大小写忽略，全角半角忽略
+
+- 中文添加拼音相关转换，添加繁简体转换忽略
+
+- 允许用户自定义敏感词和白名单
+
+# 快速开始
+
+## 准备
+
+- JDK1.7+
+
+- Maven 3.x+
+
+## Maven 引入
+
+```xml
+<dependency>
+    <groupId>com.github.houbb</groupId>
+    <artifactId>sensitive-word</artifactId>
+    <version>0.0.1</version>
+</dependency>
+```
+
+## 使用实例
+
+所有测试案例参见 [SensitiveWordBsTest]()
+
+### 判断是否包含敏感词
+
+```java
+final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。。";
+
+Assert.assertTrue(SensitiveWordBs.getInstance().contains(text));
+```
+
+### 返回第一个敏感词
+
+```java
+final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。";
+
+String word = SensitiveWordBs.getInstance().findFirst(text);
+Assert.assertEquals("五星红旗", word);
+```
+
+### 返回所有敏感词
+
+```java
+final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。";
+
+List<String> wordList = SensitiveWordBs.getInstance().findAll(text);
+Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString());
+```
--- a/doc/CHANGE_LOG.md
+++ b/doc/CHANGE_LOG.md
@@ -13,10 +13,4 @@

 | 序号 | 变更类型 | 说明 | 时间 | 备注 |
 |:---|:---|:---|:---|:--|
-| 1 | A | 网易 163 邮箱功能的实现 | 2019-12-31 17:12:14 | |
-
-# release_0.0.2
-
-| 序号 | 变更类型 | 说明 | 时间 | 备注 |
-|:---|:---|:---|:---|:--|
-| 1 | A | 多个收件人的特性支持 | 2020-1-6 17:12:14 | |
+| 1 | A | 基本功能的实现 | 2020-1-7 21:46:32 | |
--- a/doc/issues/issues.md
+++ b/doc/issues/issues.md
@@ -10,3 +10,6 @@

 ## stop-word

+## 重复词
+
+ffffuuuucccckkk
--- a/doc/issues/v2-基本敏感词的简化.md
+++ b/doc/issues/v2-基本敏感词的简化.md
@@ -0,0 +1,264 @@
+# 简化部分信息
+
+## 纯数字
+
+移除
+
+## 去重
+
+移除大量重复的信息。
+
+提取出关键的敏感词语即可。
+
+## 包含 stop-word 的信息
+
+移除 stop-word 之后进行相关的处理。
+
+## 单个字
+
+48339 === Q
+83586 === q
+117538 === ━
+117539 === │
+117540 === ┃
+117541 === ┄
+117542 === ┅
+117554 === ┆
+117555 === ┇
+117556 === ┈
+117557 === ┉
+117558 === ┊
+117559 === ┋
+117560 === ┌
+117561 === ┍
+117562 === ┎
+117563 === ┏
+117564 === ┐
+117565 === ┑
+117566 === ┒
+117567 === ┓
+117568 === └
+117569 === ┕
+117570 === ┖
+117571 === ┗
+117572 === ┘
+117573 === ┙
+117574 === ┚
+117575 === ┛
+117576 === ├
+117577 === ┝
+117578 === ┞
+117579 === ┟
+117580 === ┠
+117581 === ┡
+117582 === ┢
+117583 === ┣
+117584 === ┤
+117585 === ┥
+117586 === ┦
+117587 === ┧
+117588 === ┨
+117589 === ┩
+117590 === ┪
+117591 === ┫
+117592 === ┬
+117593 === ┭
+117594 === ┮
+117595 === ┯
+117596 === ┰
+117597 === ┱
+117598 === ┲
+117599 === ┳
+117600 === ┴
+117601 === ┵
+117602 === ┶
+117603 === ┷
+117604 === ┸
+117605 === ┹
+117606 === ┺
+117607 === ┻
+117609 === ┼
+117610 === ┽
+117611 === ┾
+117612 === ┿
+117613 === ╀
+117614 === ╁
+117615 === ╂
+117616 === ╃
+117617 === ╄
+117618 === ╅
+117619 === ╆
+117620 === ╇
+117621 === ╈
+117622 === ╉
+117623 === ╊
+117624 === ╋
+117846 === ㄖ
+121501 === 买
+121979 === 乳
+123013 === 仆
+133622 === 功
+133786 === 動
+133790 === 務
+134011 === 區
+134255 === 卐
+134287 === 卖
+134910 === 卵
+135512 === 口
+136392 === 吊
+136576 === 吨
+137367 === 喷
+137479 === 嘸
+139926 === 奸
+140085 === 妈
+140126 === 妓
+140373 === 姘
+140397 === 姦
+140409 === 姩
+140464 === 娘
+140498 === 娼
+140503 === 婊
+140519 === 婬
+140562 === 媽
+140585 === 嫖
+140668 === 孕
+141291 === 寇
+141668 === 射
+142550 === 尻
+142603 === 尿
+142620 === 屄
+142639 === 屌
+142650 === 屍
+142653 === 屎
+142665 === 屙
+143107 === 巯
+143346 === 干
+143535 === 幹
+143735 === 床
+144165 === 弓
+144386 === 弩
+144931 === 忍
+145146 === 性
+145905 === 慰
+145913 === 慾
+146837 === 戳
+146919 === 房
+147574 === 扣
+149446 === 抠
+149774 === 抽
+150089 === 挂
+150244 === 捻
+150260 === 掛
+150296 === 掯
+151938 === 插
+152406 === 操
+153468 === 日
+154328 === 曰
+154902 === 本
+155789 === 枪
+156187 === 槍
+156578 === 歌
+156780 === 死
+158105 === 氟
+158172 === 氯
+158265 === 氰
+158565 === 汞
+159598 === 洱
+159944 === 淪
+159948 === 淫
+161116 === 滚
+161125 === 滛
+161669 === 灾
+161676 === 炮
+161774 === 烂
+161845 === 烯
+161856 === 烷
+162055 === 爛
+162196 === 爽
+162941 === 獨
+162985 === 獸
+163396 === 甙
+163934 === 畜
+165856 === 眯
+165880 === 睾
+165889 === 瞳
+166039 === 砒
+166049 === 砜
+166086 === 砷
+166097 === 础
+166234 === 硼
+166254 === 碡
+166265 === 碱
+166275 === 碼
+166290 === 磷
+166298 === 磺
+166876 === 穴
+167390 === 糞
+167499 === 統
+167536 === 綸
+167961 === 罂
+168722 === 羟
+168800 === 羰
+169070 === 耣
+169444 === 肏
+169474 === 肛
+169508 === 肝
+169679 === 肼
+169680 === 肾
+169725 === 胂
+169729 === 胍
+169883 === 胺
+169907 === 脬
+169939 === 腈
+170004 === 膦
+170283 === 臺
+170406 === 色
+171007 === 苄
+171216 === 茎
+171229 === 草
+171395 === 萋
+171473 === 葵
+171614 === 蔻
+172474 === 裸
+172599 === 褻
+172877 === 証
+174115 === 賤
+174531 === 贱
+174972 === 踢
+174984 === 蹣
+175044 === 躶
+175063 === 輪
+175475 === 轮
+175543 === 辦
+176368 === 逼
+176679 === 酐
+176733 === 酮
+176734 === 酯
+176735 === 酰
+176767 === 醚
+176768 === 醛
+177126 === 鈤
+177295 === 鎷
+177321 === 钒
+177332 === 钠
+177487 === 铀
+177569 === 铊
+179476 === 锇
+179520 === 镉
+179521 === 镍
+179803 === 阴
+180109 === 陰
+180173 === 隂
+180292 === 雞
+180594 === 靠
+181185 === 騒
+181190 === 騷
+181303 === 驽
+181352 === 骚
+182246 === 鯫
+182247 === 鰢
+182306 === 鸠
+182308 === 鸡
+182405 === 鸨
+183438 === Ｂ
+183491 === ｂ
--- a/doc/issues/v5-基本敏感词的标签.md
+++ b/doc/issues/v5-基本敏感词的标签.md
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.github.houbb</groupId>
    <artifactId>sensitive-word</artifactId>
-    <version>0.0.1-SNAPSHOT</version>
+    <version>0.0.1</version>

    <properties>
        <!--============================== All Plugins START ==============================-->
@@ -141,8 +141,8 @@
    </build>

    <!--============================== ADD For sonatype START ==============================-->
-    <name>email</name>
-    <description>The most elegant email tool for java.</description>
+    <name>sensitive-word</name>
+    <description>The sensitive word tool for java with DFA.</description>

    <parent>
        <groupId>org.sonatype.oss</groupId>
@@ -157,8 +157,8 @@
        </license>
    </licenses>
    <scm>
-        <url>https://github.com/houbb/email</url>
-        <connection>https://github.com/houbb/email.git</connection>
+        <url>https://github.com/houbb/sensitive-word</url>
+        <connection>https://github.com/houbb/sensitive-word.git</connection>
        <developerConnection>https://houbb.github.io/</developerConnection>
    </scm>
    <developers>
--- a/release.bat
+++ b/release.bat
@@ -10,13 +10,13 @@ ECHO "============================= RELEASE START..."

 :: 版本号信息(需要手动指定)
 :::: 旧版本名称
-SET version=0.0.2
+SET version=0.0.1
 :::: 新版本名称
-SET newVersion=0.0.3
+SET newVersion=0.0.2
 :::: 组织名称
 SET groupName=com.github.houbb
 :::: 项目名称
-SET projectName=email
+SET projectName=sensitive-word

 :: release 项目版本
 :::: snapshot 版本号
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java
@@ -1,9 +1,10 @@
 package com.github.houbb.sensitive.word.api;

-import com.github.houbb.sensitive.word.model.WordMapEntry;
+import com.github.houbb.heaven.util.lang.StringUtil;
+import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;

 import java.util.Collection;
-import java.util.Map;
+import java.util.List;

 /**
 * 敏感词 map
@@ -12,12 +13,38 @@ import java.util.Map;
 */
 public interface IWordMap {

+
    /**
-     * 获取单词 map
-     * @param collection 集合
-     * @return 敏感词 map
+     * 初始化敏感词 map
+     * @param collection 集合信息
     * @since 0.0.1
     */
-    Map<String, WordMapEntry> getWordMap(final Collection<String> collection);
+    void initWordMap(Collection<String> collection);
+
+    /**
+     * 是否包含敏感词
+     * @param string 字符串
+     * @return 是否包含
+     * @since 0.0.1
+     * @see ValidModeEnum#FAIL_FAST 建议使用快速返回模式
+     */
+    boolean contains(final String string);
+
+    /**
+     * 返回所有对应的敏感词
+     * @param string 原始字符串
+     * @return 结果
+     * @since 0.0.1
+     * @see ValidModeEnum#FAIL_OVER 建议使用全部检测返回模式
+     */
+    List<String> findAll(final String string);
+
+    /**
+     * 返回第一个对应的敏感词
+     * @param string 原始字符串
+     * @return 结果
+     * @since 0.0.1
+     */
+    String findFirst(final String string);

 }
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
@@ -5,7 +5,9 @@ import com.github.houbb.heaven.util.guava.Guavas;
 import com.github.houbb.heaven.util.lang.StringUtil;
 import com.github.houbb.heaven.util.util.CollectionUtil;
 import com.github.houbb.sensitive.word.api.IWordData;
+import com.github.houbb.sensitive.word.api.IWordMap;
 import com.github.houbb.sensitive.word.support.data.SensitiveWordData;
+import com.github.houbb.sensitive.word.support.map.SensitiveWordMap;

 import java.util.List;

@@ -22,14 +24,6 @@ public class SensitiveWordBs {
     */
    private SensitiveWordBs(){}

-    /**
-     * 待验证字符串信息
-     * ps: 可以添加多个辅助类 xxxStringProvider
-     * 如 FileXXX
-     * @since 0.0.1
-     */
-    private volatile String target;
-
    /**
     * 敏感数据信息
     * @since 0.0.1
@@ -37,82 +31,73 @@ public class SensitiveWordBs {
    private IWordData sensitiveWordData = Instances.singleton(SensitiveWordData.class);

    /**
-     * 新建验证实例
-     * @param string 字符串
-     * @return this
+     * 敏感词 map
     * @since 0.0.1
     */
-    public static SensitiveWordBs newInstance(final String string) {
-        SensitiveWordBs instance = new SensitiveWordBs();
-        instance.target = string;
-        return instance;
+    private IWordMap sensitiveWordMap = Instances.singleton(SensitiveWordMap.class);
+
+    /**
+     * 获取单例信息
+     * @since 0.0.1
+     */
+    private static final SensitiveWordBs INSTANCE;
+
+    static {
+        synchronized (SensitiveWordBs.class) {
+            INSTANCE = new SensitiveWordBs();
+            List<String> lines = INSTANCE.sensitiveWordData.getWordData();
+            INSTANCE.sensitiveWordMap.initWordMap(lines);
+        }
    }

    /**
-     * 指定目标字符串信息
-     * @param string 字符串
+     * 新建验证实例
     * @return this
     * @since 0.0.1
     */
-    public SensitiveWordBs target(final String string) {
-        this.target = string;
-        return this;
+    public static SensitiveWordBs getInstance() {
+        return INSTANCE;
    }

    /**
     * 是否合法
+     * @param target 目标字符串
     * @return 是否
     * @since 0.0.1
-     * @see #contains() 是否包含
     */
-    public boolean valid() {
-        return !contains();
+    public boolean valid(final String target) {
+        return !contains(target);
    }

    /**
     * 是否包含敏感词
+     * @param target 目标字符串
     * @return 是否
     * @since 0.0.1
-     * @see #findAll() 列表不为空即可
     */
-    public boolean contains() {
-        return CollectionUtil.isNotEmpty(findAll());
+    public boolean contains(final String target) {
+        return this.sensitiveWordMap.contains(target);
    }

    /**
     * 返回所有的敏感词
-     * 1. 这里是默认去重的。
+     * 1. 这里是默认去重的，且是有序的。
+     * 2. 如果不存在，返回空列表
     * @return 敏感词列表
     * @since 0.0.1
     */
-    public List<String> findAll() {
-        if(StringUtil.isEmpty(target)) {
-            return Guavas.newArrayList();
-        }
-
-        // 分词
-        return null;
+    public List<String> findAll(final String target) {
+        return this.sensitiveWordMap.findAll(target);
    }

    /**
-     * 执行过滤
-     * 1. 使用默认策略
-     * 2. 默认策略就是直接移除。
-     * @return 过滤后的结果
+     * 返回第一个敏感词
+     * （1）如果不存在，则返回 {@code null}
+     * @return 敏感词
     * @since 0.0.1
     */
-    private String filter() {
-        return filter(StringUtil.EMPTY);
-    }
-
-    /**
-     * 指定过滤的字符，执行过滤
-     * 1. filter 只是一种特殊的字符串替换策略。
-     * @return 过滤后的结果
-     * @since 0.0.1
-     */
-    private String filter(final String filter) {
-        return "";
+    public String findFirst(final String target) {
+        return this.sensitiveWordMap.findFirst(target);
    }

 }
--- a/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java
+++ b/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java
@@ -0,0 +1,19 @@
+package com.github.houbb.sensitive.word.constant;
+
+/**
+ * <p> project: sensitive-word-AppConst </p>
+ * <p> create on 2020/1/7 23:39 </p>
+ *
+ * @author Administrator
+ * @since 0.0.1
+ */
+public final class AppConst {
+
+    /**
+     * 是否为结束标识
+     * ps: 某种角度而言，我不是很喜欢这种风格。
+     * @since 0.0.1
+     */
+    public static final String IS_END = "isEnd";
+
+}
--- a/src/main/java/com/github/houbb/sensitive/word/constant/enums/ValidModeEnum.java
+++ b/src/main/java/com/github/houbb/sensitive/word/constant/enums/ValidModeEnum.java
@@ -0,0 +1,23 @@
+package com.github.houbb.sensitive.word.constant.enums;
+
+/**
+ * <p> project: sensitive-word-ValidModeEnum </p>
+ * <p> create on 2020/1/7 22:46 </p>
+ *
+ * @author Administrator
+ * @since 1.0.0
+ */
+public enum ValidModeEnum {
+
+    /**
+     * 快速失败
+     * @since 0.0.1
+     */
+    FAIL_FAST,
+
+    /**
+     * 全部遍历
+     * @since 0.0.1
+     */
+    FAIL_OVER
+}
--- a/src/main/java/com/github/houbb/sensitive/word/constant/package-info.java
+++ b/src/main/java/com/github/houbb/sensitive/word/constant/package-info.java
@@ -0,0 +1,8 @@
+/**
+ * <p> project: sensitive-word-package-info </p>
+ * <p> create on 2020/1/7 22:46 </p>
+ *
+ * @author Administrator
+ * @since 1.0.0
+ */
+package com.github.houbb.sensitive.word.constant;
--- a/src/main/java/com/github/houbb/sensitive/word/model/WordMapEntry.java
+++ b/src/main/java/com/github/houbb/sensitive/word/model/WordMapEntry.java
@@ -1,58 +0,0 @@
-package com.github.houbb.sensitive.word.model;
-
-import java.util.List;
-
-/**
- * 所有的敏感词，第一个字都是 key
- *
- * @author binbin.hou
- * @since 0.0.1
- */
-public class WordMapEntry {
-
-    /**
-     * 单个单词
-     * @since 0.0.1
-     */
-    private String word;
-
-    /**
-     * 是否为结束
-     * @since 0.0.1
-     */
-    private boolean isEnd;
-
-    /**
-     * 下一层的信息列表
-     * @since 0.0.1
-     */
-    private List<WordMapEntry> nextEntryList;
-
-    public String word() {
-        return word;
-    }
-
-    public WordMapEntry word(String word) {
-        this.word = word;
-        return this;
-    }
-
-    public boolean end() {
-        return isEnd;
-    }
-
-    public WordMapEntry end(boolean end) {
-        isEnd = end;
-        return this;
-    }
-
-    public List<WordMapEntry> nextEntryList() {
-        return nextEntryList;
-    }
-
-    public WordMapEntry nextEntryList(List<WordMapEntry> nextEntryList) {
-        this.nextEntryList = nextEntryList;
-        return this;
-    }
-
-}
--- a/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java
@@ -9,6 +9,7 @@ import java.util.List;

 /**
 * 数据加载使用单例的模式，只需要加载一次即可。
+ *
 * @author binbin.hou
 * @since 0.0.1
 */
@@ -17,17 +18,21 @@ public class SensitiveWordData implements IWordData {

    /**
     * 默认的内置行
+     *
     * @since 0.0.1
     */
    private static List<String> defaultLines;

    static {
+        synchronized (SensitiveWordData.class) {
            long start = System.currentTimeMillis();
            defaultLines = new ArrayList<>(183837);
            defaultLines = StreamUtils.readAllLines("/dict.txt");
            long end = System.currentTimeMillis();
            System.out.println("Sensitive data loaded!, cost time: " + (end - start) + " ms");
        }
+    }
+

    @Override
    public List<String> getWordData() {
--- a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
@@ -1,12 +1,18 @@
 package com.github.houbb.sensitive.word.support.map;

 import com.github.houbb.heaven.annotation.ThreadSafe;
+import com.github.houbb.heaven.util.guava.Guavas;
 import com.github.houbb.heaven.util.lang.ObjectUtil;
+import com.github.houbb.heaven.util.lang.StringUtil;
+import com.github.houbb.heaven.util.util.CollectionUtil;
+import com.github.houbb.heaven.util.util.MapUtil;
 import com.github.houbb.sensitive.word.api.IWordMap;
-import com.github.houbb.sensitive.word.model.WordMapEntry;
+import com.github.houbb.sensitive.word.constant.AppConst;
+import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;

 import java.util.Collection;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;

 /**
@@ -18,89 +24,216 @@ import java.util.Map;
@ThreadSafe
 public class SensitiveWordMap implements IWordMap {

+    /**
+     * 脱敏单词 map
+     *
+     * @since 0.0.1
+     */
+    private static Map sensitiveWordMap;

    /**
     * 读取敏感词库，将敏感词放入HashSet中，构建一个DFA算法模型：
     *
-     * <pre>
-     * 中 = {
-     * isEnd = 0
-     * 国 = {
-     * isEnd = 1
-     * 人 = {isEnd = 0
-     * 民 = {isEnd = 1}
-     * }
-     * 男  = {
-     * isEnd = 0
-     * 人 = {
-     * isEnd = 1
-     * }
-     * }
-     * }
-     * }
-     *
-     * 五 = {
-     * isEnd = 0
-     * 星 = {
-     * isEnd = 0
-     * 红 = {
-     * isEnd = 0
-     * 旗 = {
-     * isEnd = 1
-     * }
-     * }
-     * }
-     * }
-     * </pre>
-     *
-     * key: 对应的中文
-     * value: 是否为结束。
-     *
-     * 日本人，日本鬼子为例
-     *
-     * 1、在hashMap中查询“日”看其是否在hashMap中存在，如果不存在，则证明已“日”开头的敏感词还不存在，则我们直接构建这样的一棵树。跳至3。
-     * 2、如果在hashMap中查找到了，表明存在以“日”开头的敏感词，设置hashMap = hashMap.get("日")，跳至1，依次匹配“本”、“人”。
-     * 3、判断该字是否为该词中的最后一个字。若是表示敏感词结束，设置标志位isEnd = 1，否则设置标志位isEnd = 0；
-     *
     * @param collection 敏感词库集合
     * @since 0.0.1
-     *
+     * <p>
     * 使用对象代码 map 的这种一直递归。
-     *
+     * 参考资料：https://www.cnblogs.com/AlanLee/p/5329555.html
+     * https://blog.csdn.net/chenssy/article/details/26961957
     */
    @Override
-    public Map<String, WordMapEntry> getWordMap(Collection<String> collection) {
-        Map<String, WordMapEntry> resultMap = new HashMap<>(collection.size());
+    @SuppressWarnings("unchecked")
+    public void initWordMap(Collection<String> collection) {
+        // 避免重复加载
+        if (MapUtil.isNotEmpty(sensitiveWordMap)) {
+            return;
+        }
+
+        long startTime = System.currentTimeMillis();
+        // 避免扩容带来的消耗
+        sensitiveWordMap = new HashMap(collection.size());

        for (String key : collection) {
+            if (StringUtil.isEmpty(key)) {
+                continue;
+            }
+
+            // 用来按照相应的格式保存敏感词库数据
            char[] chars = key.toCharArray();
            final int size = chars.length;

-            for (int i = 0; i < size; i++) {
-                String charStr = String.valueOf(chars[i]);
+            // 每一个新词的循环，直接将结果设置为当前 map，所有变化都会体现在结果的 map 中
+            Map currentMap = sensitiveWordMap;

-                // 直接获取对应的 map
-                WordMapEntry wordMapEntry = resultMap.get(charStr);
+            for (int i = 0; i < size; i++) {
+                // 截取敏感词当中的字，在敏感词库中字为HashMap对象的Key键值
+                char charKey = chars[i];
+                // 如果集合存在
+                Object wordMap = currentMap.get(charKey);

                // 如果集合存在
-                if(ObjectUtil.isNotNull(wordMapEntry)) {
-
+                if (ObjectUtil.isNotNull(wordMap)) {
+                    // 直接将获取到的 map 当前当前 map 进行继续的操作
+                    currentMap = (Map) wordMap;
                } else {
-//                    // 如果集合不存在，直接新建一个 map
-//                    wordMap = new HashMap<>(size);
-//                    // 判断是否为最后一个，如果是则设置为1
-//                    boolean isEnd = i == size - 1;
-//                    // 设置最后的结果
-//                    wordMap.put(charStr, isEnd);
+                    //不存在则，则构建一个新的map，同时将isEnd设置为0，因为他不是最后一
+                    Map<String, Boolean> newWordMap = new HashMap<>();
+                    newWordMap.put(AppConst.IS_END, false);
+
+                    // 将新的节点放入当前 map 中
+                    currentMap.put(charKey, newWordMap);
+
+                    // 将新节点设置为当前节点，方便下一次节点的循环。
+                    currentMap = newWordMap;
+                }
+
+                // 判断是否为最后一个，添加是否结束的标识。
+                if (i == size - 1) {
+                    currentMap.put(AppConst.IS_END, true);
+                }
            }
        }

-        }
-        return resultMap;
+        long endTime = System.currentTimeMillis();
+        System.out.println("Init sensitive word map end! Cost time " + (endTime-startTime) + "ms");
    }

-    public static void main(String[] args) {
-        System.out.println("s".toCharArray()[0]+"");
+    /**
+     * 是否包含
+     * （1）直接遍历所有
+     * （2）如果遇到，则直接返回 true
+     *
+     * @param string 字符串
+     * @return 是否包含
+     * @since 0.0.1
+     */
+    @Override
+    public boolean contains(String string) {
+        if (StringUtil.isEmpty(string)) {
+            return false;
+        }
+
+        for (int i = 0; i < string.length(); i++) {
+            int checkResult = checkSensitiveWord(string, i);
+            // 快速返回
+            if (checkResult > 0) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * 返回所有对应的敏感词
+     * （1）结果是有序的
+     * （2）结果是默认去重的
+     *
+     * @param string 原始字符串
+     * @return 结果
+     * @since 0.0.1
+     */
+    @Override
+    public List<String> findAll(String string) {
+        return getSensitiveWords(string, ValidModeEnum.FAIL_OVER);
+    }
+
+    @Override
+    public String findFirst(String string) {
+        List<String> stringList = getSensitiveWords(string, ValidModeEnum.FAIL_FAST);
+
+        if (CollectionUtil.isEmpty(stringList)) {
+            return null;
+        }
+
+        return stringList.get(0);
+    }
+
+    /**
+     * 获取敏感词列表
+     *
+     * @param text     文本
+     * @param modeEnum 模式
+     * @return 结果列表
+     * @since 0.0.1
+     */
+    private List<String> getSensitiveWords(final String text, final ValidModeEnum modeEnum) {
+        //1. 是否存在敏感词，如果比存在，直接返回空列表
+        if (StringUtil.isEmpty(text)) {
+            return Guavas.newArrayList();
+        }
+
+        List<String> resultList = Guavas.newArrayList();
+        for (int i = 0; i < text.length(); i++) {
+            int wordLength = checkSensitiveWord(text, i);
+
+            // 命中
+            if (wordLength > 0) {
+                // 保存敏感词
+                String sensitiveWord = text.substring(i, i+wordLength);
+
+                // 添加去重
+                if(!resultList.contains(sensitiveWord)) {
+                    resultList.add(sensitiveWord);
+                }
+
+                // 快速返回
+                if (ValidModeEnum.FAIL_FAST.equals(modeEnum)) {
+                    break;
+                }
+
+                // 增加 i 的步长
+                // 为什么要-1，因为默认就会自增1
+                i += wordLength - 1;
+            }
+        }
+
+        return resultList;
+    }
+
+    /**
+     * 检查敏感词数量
+     * <p>
+     * （1）如果未命中敏感词，直接返回 0
+     * （2）命中敏感词，则返回敏感词的长度。
+     *
+     * @param txt        文本信息
+     * @param beginIndex 开始下标
+     * @return 敏感词对应的长度
+     * @since 0.0.1
+     */
+    private int checkSensitiveWord(String txt, int beginIndex) {
+        Map nowMap = sensitiveWordMap;
+
+        boolean flag = false;
+        // 记录敏感词的长度
+        int sensitiveWordLength = 0;
+
+        for (int i = beginIndex; i < txt.length(); i++) {
+            char charKey = txt.charAt(i);
+            // 判断该字是否存在于敏感词库中
+            // 并且将 nowMap 替换为新的 map，进入下一层的循环。
+            nowMap = (Map) nowMap.get(charKey);
+            if (ObjectUtil.isNotNull(nowMap)) {
+                sensitiveWordLength++;
+
+                // 判断是否是敏感词的结尾字，如果是结尾字则判断是否继续检测
+                boolean isEnd = (boolean) nowMap.get(AppConst.IS_END);
+                if (isEnd) {
+                    flag = true;
+
+                    // 这里直接默认 fail-fast 即可。
+                    break;
+                }
+            } else {
+                // 直接跳出循环
+                break;
+            }
+        }
+
+        if (!flag) {
+            sensitiveWordLength = 0;
+        }
+        return sensitiveWordLength;
    }

 }
--- a/src/main/resources/dict.txt
+++ b/src/main/resources/dict.txt
@@ -164386,7 +164386,6 @@ z以留吧以其以武
 百花故事
 百花盛放
 百行教师贱
-的
 的同修
 的妹 子 都 很 急 约
 的阿斗
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java
@@ -0,0 +1,40 @@
+package com.github.houbb.sensitive.word.bs;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.List;
+
+/**
+ * <p> project: sensitive-word-SensitiveWordBsTest </p>
+ * <p> create on 2020/1/7 23:43 </p>
+ *
+ * @author Administrator
+ * @since 0.0.1
+ */
+public class SensitiveWordBsTest {
+
+    @Test
+    public void containsTest() {
+        final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。。";
+
+        Assert.assertTrue(SensitiveWordBs.getInstance().contains(text));
+    }
+
+    @Test
+    public void findAllTest() {
+        final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。";
+
+        List<String> wordList = SensitiveWordBs.getInstance().findAll(text);
+        Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString());
+    }
+
+    @Test
+    public void findFirstTest() {
+        final String text = "五星红旗迎风飘扬，毛主席的画像屹立在天安门前。";
+
+        String word = SensitiveWordBs.getInstance().findFirst(text);
+        Assert.assertEquals("五星红旗", word);
+    }
+
+}
--- a/src/test/java/com/github/houbb/sensitive/word/data/DataInitTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/data/DataInitTest.java
@@ -102,4 +102,18 @@ public class DataInitTest {
        FileUtil.write(target, disctinct);
    }

+    @Test
+    @Ignore
+    public void oneWordTest() {
+        final String source = "D:\\_github\\sensitive-word\\src\\main\\resources\\dict.txt";
+
+        List<String> lines = FileUtil.readAllLines(source);
+        for(int i = 0; i < lines.size(); i++) {
+            String line = lines.get(i);
+            if(line.trim().length() == 1) {
+                System.out.println(i + " === " + line);
+            }
+        }
+    }
+
 }