[Feature] add for new

2026-03-22 08:27:36 +08:00 · 2025-09-05 17:16:46 +08:00
parent a46f43024d
commit 8378e202bb
20 changed files with 672 additions and 622 deletions
--- a/CHANGE_LOG.md
+++ b/CHANGE_LOG.md
@@ -471,3 +471,9 @@
 |:---|:-----|-----------------------------|:------------------|:--------------------|
 | 1  | O    | 改进 check、format 的 chains 方法 | 2025-9-5 16:22:24 | 优化性能                |
 | 2  | O    | InnerWordFormatUtils#format | 2025-9-5 16:22:24 | 优化性能+内存 toCharArray |
+
+# release_0.29.2
+
+| 序号 | 变更类型 | 说明                  | 时间                | 备注                  |
+|:---|:-----|---------------------|:------------------|:--------------------|
+| 1  | O    | 拆箱、装箱优化。优化数字。英文的格式化 | 2025-9-5 16:22:24 | 优化性能                |
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.github.houbb</groupId>
    <artifactId>sensitive-word</artifactId>
-    <version>0.29.1</version>
+    <version>0.29.2</version>

    <properties>
        <!--============================== All Plugins START ==============================-->
@@ -115,7 +115,6 @@
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
        </dependency>
-
    </dependencies>

    <build>
@@ -182,6 +181,20 @@
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-javadoc-plugin</artifactId>
                <version>${plugin.maven-javadoc-plugin.version}</version>
+                <configuration>
+                    <!-- 指定源码编码 -->
+                    <encoding>UTF-8</encoding>
+                    <!-- 指定文档编码 -->
+                    <docencoding>UTF-8</docencoding>
+                    <!-- 输出 HTML 的 charset -->
+                    <charset>UTF-8</charset>
+                    <!-- 强制生成，不因警告/错误中断 -->
+                    <failOnError>false</failOnError>
+                    <!-- 可以跳过 doclint -->
+                    <additionalJOptions>
+                        <additionalJOption>-Xdoclint:none</additionalJOption>
+                    </additionalJOptions>
+                </configuration>
            </plugin>

        </plugins>
@@ -252,6 +265,17 @@
                        <groupId>org.apache.maven.plugins</groupId>
                        <artifactId>maven-javadoc-plugin</artifactId>
                        <version>${plugin.maven-javadoc-plugin.version}</version>
+                        <configuration>
+                            <encoding>UTF-8</encoding>
+                            <charset>UTF-8</charset>
+                            <docencoding>UTF-8</docencoding>
+                            <additionalJOptions>
+                                <additionalJOption>-Xdoclint:none</additionalJOption>
+                                <additionalJOption>-charset UTF-8</additionalJOption>
+                                <additionalJOption>-encoding UTF-8</additionalJOption>
+                                <additionalJOption>-docencoding UTF-8</additionalJOption>
+                            </additionalJOptions>
+                        </configuration>
                        <executions>
                            <execution>
                                <phase>package</phase>
--- a/src/main/java/com/github/houbb/sensitive/word/collection/Char2CharMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/collection/Char2CharMap.java
@@ -0,0 +1,106 @@
+package com.github.houbb.sensitive.word.collection;
+
+/**
+ * 原生无装箱、拆箱的实现
+ *
+ * @since 0.29.2
+ */
+public final class Char2CharMap {
+
+    private static final char EMPTY_KEY = '\0'; // 特殊标记，表示空槽
+    private static final float LOAD_FACTOR = 0.5f;
+
+    private char[] keys;
+    private char[] values;
+    private int size;
+    private int mask;   // capacity-1，用于快速取模
+    private int maxSize;
+
+    public Char2CharMap(int expectedSize) {
+        int capacity = tableSizeFor((int) (expectedSize / LOAD_FACTOR) + 1);
+        this.keys = new char[capacity];
+        this.values = new char[capacity];
+        this.mask = capacity - 1;
+        this.maxSize = (int) (capacity * LOAD_FACTOR);
+        this.size = 0;
+    }
+
+    /** 2 的幂次方容量 */
+    private static int tableSizeFor(int cap) {
+        int n = cap - 1;
+        n |= n >>> 1;
+        n |= n >>> 2;
+        n |= n >>> 4;
+        n |= n >>> 8;
+        n |= n >>> 16;
+        return (n < 2) ? 2 : (n >= (1 << 30) ? (1 << 30) : n + 1);
+    }
+
+    private int hash(char k) {
+        return (k * 0x9E3779B9) & mask; // 乘法哈希 + mask
+    }
+
+    /** 插入或覆盖 */
+    public void put(char key, char value) {
+        if (key == EMPTY_KEY) {
+            throw new IllegalArgumentException("Key '\0' is reserved as EMPTY_KEY.");
+        }
+        int idx = hash(key);
+        while (true) {
+            if (keys[idx] == EMPTY_KEY) {
+                keys[idx] = key;
+                values[idx] = value;
+                if (++size >= maxSize) {
+                    resize();
+                }
+                return;
+            } else if (keys[idx] == key) {
+                values[idx] = value;
+                return;
+            }
+            idx = (idx + 1) & mask;
+        }
+    }
+
+    /** 查询，不存在时返回 defaultValue */
+    public char get(char key, char defaultValue) {
+        if (key == EMPTY_KEY) return defaultValue;
+        int idx = hash(key);
+        while (true) {
+            char k = keys[idx];
+            if (k == EMPTY_KEY) return defaultValue;
+            if (k == key) return values[idx];
+            idx = (idx + 1) & mask;
+        }
+    }
+
+    public char get(char key) {
+        char defaultVal = 0;
+        return get(key, defaultVal);
+    }
+
+    private void resize() {
+        int newCap = keys.length << 1;
+        char[] oldKeys = keys;
+        char[] oldVals = values;
+
+        keys = new char[newCap];
+        values = new char[newCap];
+        mask = newCap - 1;
+        maxSize = (int) (newCap * LOAD_FACTOR);
+        size = 0;
+
+        for (int i = 0; i < oldKeys.length; i++) {
+            char k = oldKeys[i];
+            if (k != EMPTY_KEY) {
+                put(k, oldVals[i]);
+            }
+        }
+    }
+
+    public int size() {
+        return size;
+    }
+}
+
+
--- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java
@@ -1,203 +0,0 @@
-package com.github.houbb.sensitive.word.support.data;
-
-import com.github.houbb.heaven.util.lang.ObjectUtil;
-import com.github.houbb.heaven.util.lang.StringUtil;
-import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
-import com.github.houbb.sensitive.word.constant.WordConst;
-import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
-
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * 敏感词 map
- *
- * 不再维护，降低维护成本
- * 
- * @author binbin.hou
- * @since 0.0.1
- */
-@Deprecated
-public class WordDataHashMap extends AbstractWordData {
-
-    /**
-     * 脱敏单词 map
-     *
-     * @since 0.0.1
-     */
-    private Map innerWordMap;
-
-    /**
-     * 读取敏感词库，将敏感词放入HashSet中，构建一个DFA算法模型：
-     *
-     * @param collection 敏感词库集合
-     * @since 0.0.1
-     * <p>
-     * 使用对象代码 map 的这种一直递归。
-     * 参考资料：https://www.cnblogs.com/AlanLee/p/5329555.html
-     * https://blog.csdn.net/chenssy/article/details/26961957
-     */
-    @Override
-    @SuppressWarnings("unchecked")
-    public synchronized void doInitWordData(Collection<String> collection) {
-        // 避免扩容带来的消耗
-        Map newInnerWordMap = new HashMap(collection.size());
-
-        for (String key : collection) {
-            if (StringUtil.isEmpty(key)) {
-                continue;
-            }
-
-            // 用来按照相应的格式保存敏感词库数据
-            final int size = key.length();
-
-            // 每一个新词的循环，直接将结果设置为当前 map，所有变化都会体现在结果的 map 中
-            Map currentMap = newInnerWordMap;
-
-            for (int i = 0; i < size; i++) {
-                // 截取敏感词当中的字，在敏感词库中字为HashMap对象的Key键值
-                char charKey = key.charAt(i);
-                // 如果集合存在
-                Object wordMap = currentMap.get(charKey);
-
-                // 如果集合存在
-                if (ObjectUtil.isNotNull(wordMap)) {
-                    // 直接将获取到的 map 当前当前 map 进行继续的操作
-                    currentMap = (Map) wordMap;
-                } else {
-                    //不存在则，则构建一个新的map，同时将isEnd设置为0，因为他不是最后一
-                    Map<String, Boolean> newWordMap = new HashMap<>(8);
-                    newWordMap.put(WordConst.IS_END, false);
-
-                    // 将新的节点放入当前 map 中
-                    currentMap.put(charKey, newWordMap);
-
-                    // 将新节点设置为当前节点，方便下一次节点的循环。
-                    currentMap = newWordMap;
-                }
-            }
-
-            // 判断是否为最后一个，添加是否结束的标识。
-            currentMap.put(WordConst.IS_END, true);
-        }
-
-        // 最后更新为新的 map，保证更新过程中旧的数据可用
-        this.innerWordMap = newInnerWordMap;
-    }
-
-    @Override
-    protected void doRemoveWord(Collection<String> collection) {
-
-    }
-
-    @Override
-    protected void doAddWord(Collection<String> collection) {
-
-    }
-
-    /**
-     * 是否包含
-     * （1）直接遍历所有
-     * （2）如果遇到，则直接返回 true
-     *
-     * @param stringBuilder 字符串
-     * @param innerContext 内部上下文
-     * @return 是否包含
-     * @since 0.0.1
-     */
-    @Override
-    public WordContainsTypeEnum doContains(final StringBuilder stringBuilder,
-                                         final InnerSensitiveWordContext innerContext) {
-        return innerContainsSensitive(stringBuilder, innerContext);
-    }
-
-    private WordContainsTypeEnum innerContainsSensitive(StringBuilder stringBuilder,
-                                                        final InnerSensitiveWordContext innerContext) {
-        // 初始化为当前的 map
-        Map nowMap = this.innerWordMap;
-
-        // 记录敏感词的长度
-        final int len = stringBuilder.length();
-        for (int i = 0; i < len; i++) {
-            // 获取当前的 map 信息
-            nowMap = getNowMap(nowMap, i, stringBuilder, innerContext);
-
-            // 如果不为空，则判断是否为结尾。
-            if (ObjectUtil.isNull(nowMap)) {
-                return WordContainsTypeEnum.NOT_FOUND;
-            }
-        }
-
-        // 是否为结尾，便于快速失败
-        boolean isEnd =  isEnd(nowMap);
-        if(isEnd) {
-            return WordContainsTypeEnum.CONTAINS_END;
-        }
-
-        return WordContainsTypeEnum.CONTAINS_PREFIX;
-    }
-
-    /**
-     * 判断是否结束
-     * BUG-FIX: 避免出现敏感词库中没有的文字。
-     * @param map map 信息
-     * @return 是否结束
-     * @since 0.0.9
-     */
-    private static boolean isEnd(final Map map) {
-        if(ObjectUtil.isNull(map)) {
-            return false;
-        }
-
-        Object value = map.get(WordConst.IS_END);
-        if(ObjectUtil.isNull(value)) {
-            return false;
-        }
-
-        return (boolean)value;
-    }
-    /**
-     * 获取当前的 Map
-     * @param nowMap 原始的当前 map
-     * @param index 下标
-     * @param stringBuilder 文本缓存
-     * @param sensitiveContext 上下文
-     * @return 实际的当前 map
-     * @since 0.0.7
-     */
-    private Map getNowMap(Map nowMap,
-                          final int index,
-                          final StringBuilder stringBuilder,
-                          final InnerSensitiveWordContext sensitiveContext) {
-        final IWordContext context = sensitiveContext.wordContext();
-
-        // 这里的 char 已经是统一格式化之后的，所以可以不用再次格式化。
-        char mappingChar = stringBuilder.charAt(index);
-
-        // 这里做一次重复词的处理
-        //TODO: 这里可以优化，是否获取一次。
-        Map currentMap = (Map) nowMap.get(mappingChar);
-        // 启用忽略重复&当前下标不是第一个
-        if(context.ignoreRepeat()
-                && index > 0) {
-            char preMappingChar = stringBuilder.charAt(index-1);
-
-            // 直接赋值为上一个 map
-            if(preMappingChar == mappingChar) {
-                currentMap = nowMap;
-            }
-        }
-
-        return currentMap;
-    }
-
-    @Override
-    public synchronized void destroy() {
-        if(innerWordMap != null) {
-            innerWordMap.clear();
-        }
-    }
-
-}
--- a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreEnglishStyle.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreEnglishStyle.java
@@ -1,15 +1,18 @@
 package com.github.houbb.sensitive.word.support.format;

 import com.github.houbb.heaven.annotation.ThreadSafe;
-import com.github.houbb.sensitive.word.api.IWordFormat;
 import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.utils.InnerWordCharUtils;
+import com.github.houbb.sensitive.word.api.IWordFormat;
+
+import java.util.HashMap;
+import java.util.Map;

 /**
 * 忽略英文的各种格式
 * @author binbin.hou
 * @since 0.0.6
 */
+@Deprecated
@ThreadSafe
 public class WordFormatIgnoreEnglishStyle implements IWordFormat {

@@ -19,9 +22,52 @@ public class WordFormatIgnoreEnglishStyle implements IWordFormat {
        return INSTANCE;
    }

+    /**
+     * 英文字母1
+     * @since 0.0.4
+     */
+    private static final String LETTERS_ONE =
+            "ⒶⒷⒸⒹⒺⒻⒼⒽⒾⒿⓀⓁⓂⓃⓄⓅⓆⓇⓈⓉⓊⓋⓌⓍⓎⓏ" +
+                    "ⓐⓑⓒⓓⓔⓕⓖⓗⓘⓙⓚⓛⓜⓝⓞⓟⓠⓡⓢⓣⓤⓥⓦⓧⓨⓩ" +
+                    "⒜⒝⒞⒟⒠⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴⒵";
+
+    /**
+     * 英文字母2
+     * @since 0.0.4
+     */
+    private static final String LETTERS_TWO =
+            "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
+                    "abcdefghijklmnopqrstuvwxyz" +
+                    "abcdefghijklmnopqrstuvwxyz";
+
+
+    /**
+     * 字母映射表
+     */
+    private static final Map<Character,Character> LETTER_MAP = new HashMap<>(LETTERS_ONE.length());
+
+    static {
+        final int size = LETTERS_ONE.length();
+        for(int i = 0; i < size; i++) {
+            LETTER_MAP.put(LETTERS_ONE.charAt(i), LETTERS_TWO.charAt(i));
+        }
+    }
+
+    /**
+     * 映射后的 char
+     * @param c 待转换的 char
+     * @return 转换结果
+     * @since 0.29.x
+     */
+    private char getMappingChar(final char c) {
+        Character mapChar = LETTER_MAP.get(c);
+        return mapChar == null ? c : mapChar;
+    }
+
+
    @Override
    public char format(char original, IWordContext context) {
-        return InnerWordCharUtils.getMappingChar(original);
+        return getMappingChar(original);
    }

 }
--- a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreEnglishStyleC2C.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreEnglishStyleC2C.java
@@ -0,0 +1,69 @@
+package com.github.houbb.sensitive.word.support.format;
+
+import com.github.houbb.heaven.annotation.ThreadSafe;
+import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.api.IWordFormat;
+import com.github.houbb.sensitive.word.collection.Char2CharMap;
+
+/**
+ * 忽略英文的各种格式
+ * @author binbin.hou
+ * @since 0.0.6
+ */
+@ThreadSafe
+public class WordFormatIgnoreEnglishStyleC2C implements IWordFormat {
+
+    private static final IWordFormat INSTANCE = new WordFormatIgnoreEnglishStyleC2C();
+
+    public static IWordFormat getInstance() {
+        return INSTANCE;
+    }
+
+    /**
+     * 英文字母1
+     * @since 0.0.4
+     */
+    private static final String LETTERS_ONE =
+            "ⒶⒷⒸⒹⒺⒻⒼⒽⒾⒿⓀⓁⓂⓃⓄⓅⓆⓇⓈⓉⓊⓋⓌⓍⓎⓏ" +
+                    "ⓐⓑⓒⓓⓔⓕⓖⓗⓘⓙⓚⓛⓜⓝⓞⓟⓠⓡⓢⓣⓤⓥⓦⓧⓨⓩ" +
+                    "⒜⒝⒞⒟⒠⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴⒵";
+
+    /**
+     * 英文字母2
+     * @since 0.0.4
+     */
+    private static final String LETTERS_TWO =
+            "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
+                    "abcdefghijklmnopqrstuvwxyz" +
+                    "abcdefghijklmnopqrstuvwxyz";
+
+
+    /**
+     * 字母映射表
+     */
+    private static final Char2CharMap LETTER_MAP = new Char2CharMap(LETTERS_ONE.length());
+
+    static {
+        final int size = LETTERS_ONE.length();
+        for(int i = 0; i < size; i++) {
+            LETTER_MAP.put(LETTERS_ONE.charAt(i), LETTERS_TWO.charAt(i));
+        }
+    }
+
+    /**
+     * 映射后的 char
+     * @param c 待转换的 char
+     * @return 转换结果
+     * @since 0.29.x
+     */
+    private char getMappingChar(final char c) {
+        char mc = LETTER_MAP.get(c);
+        return mc == 0 ? c : mc;
+    }
+
+    @Override
+    public char format(char original, IWordContext context) {
+        return getMappingChar(original);
+    }
+
+}
--- a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreNumStyle.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreNumStyle.java
@@ -3,13 +3,16 @@ package com.github.houbb.sensitive.word.support.format;
 import com.github.houbb.heaven.annotation.ThreadSafe;
 import com.github.houbb.sensitive.word.api.IWordContext;
 import com.github.houbb.sensitive.word.api.IWordFormat;
-import com.github.houbb.sensitive.word.utils.InnerWordNumUtils;
+
+import java.util.HashMap;
+import java.util.Map;

 /**
 * 忽略数字的样式
 * @author binbin.hou
 * @since 0.0.5
 */
+@Deprecated
@ThreadSafe
 public class WordFormatIgnoreNumStyle implements IWordFormat {

@@ -19,9 +22,65 @@ public class WordFormatIgnoreNumStyle implements IWordFormat {
        return INSTANCE;
    }

+    private static final String NUM_ONE = "⓪０零º₀⓿○" +
+            "１２３４５６７８９" +
+            "一二三四五六七八九" +
+            "壹贰叁肆伍陆柒捌玖" +
+            "¹²³⁴⁵⁶⁷⁸⁹" +
+            "₁₂₃₄₅₆₇₈₉" +
+            "①②③④⑤⑥⑦⑧⑨" +
+            "⑴⑵⑶⑷⑸⑹⑺⑻⑼" +
+            "⒈⒉⒊⒋⒌⒍⒎⒏⒐" +
+            "❶❷❸❹❺❻❼❽❾" +
+            "➀➁➂➃➄➅➆➇➈" +
+            "➊➋➌➍➎➏➐➑➒" +
+            "㈠㈡㈢㈣㈤㈥㈦㈧㈨" +
+            "⓵⓶⓷⓸⓹⓺⓻⓼⓽" +
+            "㊀㊁㊂㊃㊄㊅㊆㊇㊈" +
+            "ⅰⅱⅲⅳⅴⅵⅶⅷⅸ" +
+            "ⅠⅡⅢⅣⅤⅥⅦⅧⅨ";
+
+    private static final String NUM_TWO = "0000000"+
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789";
+
+    private static final Map<Character,Character> NUMBER_MAP = new HashMap<>(NUM_ONE.length());
+
+    static {
+        final int size = NUM_ONE.length();
+        for(int i = 0; i < size; i++) {
+            NUMBER_MAP.put(NUM_ONE.charAt(i), NUM_TWO.charAt(i));
+        }
+    }
+
+    /**
+     * 映射后的 char
+     * @param c 待转换的 char
+     * @return 结果
+     * @since 0.0.4
+     */
+    private char getMappingChar(final char c) {
+        Character mapChar = NUMBER_MAP.get(c);
+        return mapChar == null ? c : mapChar;
+    }
+
    @Override
    public char format(char original, IWordContext context) {
-        return InnerWordNumUtils.getMappingChar(original);
+        return getMappingChar(original);
    }

 }
--- a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreNumStyleC2C.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreNumStyleC2C.java
@@ -0,0 +1,86 @@
+package com.github.houbb.sensitive.word.support.format;
+
+import com.github.houbb.heaven.annotation.ThreadSafe;
+import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.api.IWordFormat;
+import com.github.houbb.sensitive.word.collection.Char2CharMap;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * 忽略数字的样式
+ * @author binbin.hou
+ * @since 0.0.5
+ */
+@ThreadSafe
+public class WordFormatIgnoreNumStyleC2C implements IWordFormat {
+
+    private static final IWordFormat INSTANCE = new WordFormatIgnoreNumStyleC2C();
+
+    public static IWordFormat getInstance() {
+        return INSTANCE;
+    }
+
+    private static final String NUM_ONE = "⓪０零º₀⓿○" +
+            "１２３４５６７８９" +
+            "一二三四五六七八九" +
+            "壹贰叁肆伍陆柒捌玖" +
+            "¹²³⁴⁵⁶⁷⁸⁹" +
+            "₁₂₃₄₅₆₇₈₉" +
+            "①②③④⑤⑥⑦⑧⑨" +
+            "⑴⑵⑶⑷⑸⑹⑺⑻⑼" +
+            "⒈⒉⒊⒋⒌⒍⒎⒏⒐" +
+            "❶❷❸❹❺❻❼❽❾" +
+            "➀➁➂➃➄➅➆➇➈" +
+            "➊➋➌➍➎➏➐➑➒" +
+            "㈠㈡㈢㈣㈤㈥㈦㈧㈨" +
+            "⓵⓶⓷⓸⓹⓺⓻⓼⓽" +
+            "㊀㊁㊂㊃㊄㊅㊆㊇㊈" +
+            "ⅰⅱⅲⅳⅴⅵⅶⅷⅸ" +
+            "ⅠⅡⅢⅣⅤⅥⅦⅧⅨ";
+
+    private static final String NUM_TWO = "0000000"+
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789" +
+            "123456789";
+
+    private static final Char2CharMap NUMBER_MAP = new Char2CharMap(NUM_ONE.length());
+
+    static {
+        final int size = NUM_ONE.length();
+        for(int i = 0; i < size; i++) {
+            NUMBER_MAP.put(NUM_ONE.charAt(i), NUM_TWO.charAt(i));
+        }
+    }
+
+    /**
+     * 映射后的 char
+     * @param c 待转换的 char
+     * @return 结果
+     * @since 0.0.4
+     */
+    private char getMappingChar(final char c) {
+        char mc = NUMBER_MAP.get(c);
+        return mc == 0 ? c : mc;
+    }
+
+    @Override
+    public char format(char original, IWordContext context) {
+        return getMappingChar(original);
+    }
+
+}
--- a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreWidth.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreWidth.java
@@ -1,9 +1,9 @@
 package com.github.houbb.sensitive.word.support.format;

 import com.github.houbb.heaven.annotation.ThreadSafe;
-import com.github.houbb.heaven.util.lang.CharUtil;
 import com.github.houbb.sensitive.word.api.IWordContext;
 import com.github.houbb.sensitive.word.api.IWordFormat;
+import com.github.houbb.sensitive.word.utils.InnerCharUtils;

 /**
 * 格式化字宽度
@@ -21,7 +21,7 @@ public class WordFormatIgnoreWidth implements IWordFormat {

    @Override
    public char format(char original, IWordContext context) {
-        return CharUtil.toHalfWidth(original);
+        return InnerCharUtils.toHalfWidth(original);
    }

 }
--- a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatInit.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatInit.java
@@ -1,44 +0,0 @@
-package com.github.houbb.sensitive.word.support.format;
-
-import com.github.houbb.heaven.annotation.ThreadSafe;
-import com.github.houbb.heaven.support.pipeline.Pipeline;
-import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
-import com.github.houbb.sensitive.word.api.IWordFormat;
-import com.github.houbb.sensitive.word.api.IWordContext;
-
-import java.util.List;
-
-/**
- * 格式化责任链
- * @author binbin.hou
- * @since 0.0.5
- */
-@ThreadSafe
-@Deprecated
-public abstract class WordFormatInit implements IWordFormat {
-
-    /**
-     * 初始化列表
-     *
-     * @param pipeline 当前列表泳道
-     * @since 0.0.13
-     */
-    protected abstract void init(final Pipeline<IWordFormat> pipeline);
-
-    @Override
-    public char format(char original, IWordContext context) {
-        Pipeline<IWordFormat> pipeline = new DefaultPipeline<>();
-        init(pipeline);
-
-        char result = original;
-
-        // 循环执行
-        List<IWordFormat> charFormats = pipeline.list();
-        for(IWordFormat charFormat : charFormats) {
-            result = charFormat.format(result, context);
-        }
-
-        return result;
-    }
-
-}
--- a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java
@@ -51,7 +51,7 @@ public final class WordFormats {
    }

    public static IWordFormat ignoreEnglishStyle() {
-        return WordFormatIgnoreEnglishStyle.getInstance();
+        return WordFormatIgnoreEnglishStyleC2C.getInstance();
    }

    public static IWordFormat ignoreChineseStyle() {
@@ -59,7 +59,7 @@ public final class WordFormats {
    }

    public static IWordFormat ignoreNumStyle() {
-        return WordFormatIgnoreNumStyle.getInstance();
+        return WordFormatIgnoreNumStyleC2C.getInstance();
    }

    public static IWordFormat ignoreWidth() {
--- a/src/main/java/com/github/houbb/sensitive/word/support/format/mapping/WordFormatTextDefault.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/format/mapping/WordFormatTextDefault.java
@@ -2,7 +2,6 @@ package com.github.houbb.sensitive.word.support.format.mapping;

 import com.github.houbb.sensitive.word.api.IWordContext;
 import com.github.houbb.sensitive.word.api.IWordFormat;
-import com.github.houbb.sensitive.word.support.check.WordCheckNone;
 import com.github.houbb.sensitive.word.support.format.WordFormatNone;

 import java.util.Collections;
@@ -26,6 +25,7 @@ public class WordFormatTextDefault extends AbstractWordFormatText {
            return Collections.emptyMap();
        }

+        //v0.29.2
        Map<Character, Character> map = new HashMap<>();
        for(int i = 0; i < text.length(); i++) {
            char c = text.charAt(i);
--- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java
+++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java
@@ -5,6 +5,24 @@ package com.github.houbb.sensitive.word.utils;
 */
 public class InnerCharUtils {

+    /**
+     * 转换为半角
+     * @param original 原始
+     * @return 半角
+     * @since 0.29.2
+     */
+    public static char toHalfWidth(char original) {
+        // 全角空格
+        if (original == '\u3000') return ' ';
+        // 其他可转换全角字符
+        if (original >= '\uFF01' && original <= '\uFF5E') {
+            return (char) (original - 0xFEE0);
+        }
+        // 其他字符保持不变
+        return original;
+    }
+
+
    /**
     * 转换为整数
     * @param text 文本
--- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordCharUtils.java
+++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordCharUtils.java
@@ -1,11 +1,7 @@
 package com.github.houbb.sensitive.word.utils;

-import com.github.houbb.heaven.util.guava.Guavas;
-import com.github.houbb.heaven.util.lang.ObjectUtil;
 import com.github.houbb.sensitive.word.api.IWordResult;

-import java.util.Map;
-
 /**
 * <p> project: sensitive-word-NumUtils </p>
 * <p> create on 2020/1/8 22:18 </p>
@@ -18,84 +14,6 @@ public final class InnerWordCharUtils {
    private InnerWordCharUtils() {
    }

-    /**
-     * 英文字母1
-     * @since 0.0.4
-     */
-    private static final String LETTERS_ONE =
-            "ⒶⒷⒸⒹⒺⒻⒼⒽⒾⒿⓀⓁⓂⓃⓄⓅⓆⓇⓈⓉⓊⓋⓌⓍⓎⓏ" +
-                    "ⓐⓑⓒⓓⓔⓕⓖⓗⓘⓙⓚⓛⓜⓝⓞⓟⓠⓡⓢⓣⓤⓥⓦⓧⓨⓩ" +
-                    "⒜⒝⒞⒟⒠⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴⒵";
-
-    /**
-     * 英文字母2
-     * @since 0.0.4
-     */
-    private static final String LETTERS_TWO =
-            "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
-                    "abcdefghijklmnopqrstuvwxyz" +
-                    "abcdefghijklmnopqrstuvwxyz";
-
-
-    /**
-     * 英文字母 map
-     * @since 0.0.4
-     */
-    private static final Map<Character, Character> LETTER_MAP = Guavas.newHashMap(LETTERS_ONE.length());
-
-    static {
-        final int size = LETTERS_ONE.length();
-
-        for(int i = 0; i < size; i++) {
-            LETTER_MAP.put(LETTERS_ONE.charAt(i), LETTERS_TWO.charAt(i));
-        }
-    }
-
-    /**
-     * 映射后的 char
-     * @param character 待转换的 char
-     * @return 结果
-     * @since 0.0.4
-     */
-    public static Character getMappingChar(final Character character) {
-        final Character mapChar = LETTER_MAP.get(character);
-        if(ObjectUtil.isNotNull(mapChar)) {
-            return mapChar;
-        }
-
-        return character;
-    }
-
-    /**
-     * 构建字符串
-     * @param chars 字符数组
-     * @param startIndex 开始位置
-     * @param endIndex 结束位置
-     * @return 结果
-     * @since 0.5.0
-     */
-//    @Deprecated
-//    public static String getString(final char[] chars,
-//                                   final int startIndex,
-//                                   final int endIndex) {
-//        // 截取
-//        int len = endIndex - startIndex;
-//        return new String(chars, startIndex, len);
-//    }
-
-    /**
-     * 构建字符串
-     * @param chars 字符数组
-     * @param wordResult 结果
-     * @return 结果
-     * @since 0.5.0
-     */
-//    @Deprecated
-//    public static String getString(final char[] chars,
-//                                   final IWordResult wordResult) {
-//        return getString(chars, wordResult.startIndex(), wordResult.endIndex());
-//    }
-
    /**
     * 构建字符串
     * @param text 字符串
--- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java
+++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java
@@ -47,17 +47,20 @@ public final class InnerWordFormatUtils {

    /**
     * 字符串统一的格式化处理
+     *
+     * 注意：这个需要 map 的实现是 {@link it.unimi.dsi.fastutil.chars.Char2CharOpenHashMap}
     * @param map 映射集合
     * @param c 原始
     * @return 结果
     * @since 0.28.0
     */
    public static char getMappingChar(final Map<Character, Character> map, char c) {
-        Character mc = map.get(c);
-        if(mc != null) {
-            return mc;
+        //Char2CharOpenHashMap 不存在映射也是返回 null
+        Object mc = map.get(c);
+        if(mc == null) {
+            return c;
        }
-        return c;
+        return (char) mc;
    }

    /**
--- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordNumUtils.java
+++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordNumUtils.java
@@ -1,128 +0,0 @@
-package com.github.houbb.sensitive.word.utils;
-
-import com.github.houbb.heaven.util.guava.Guavas;
-import com.github.houbb.heaven.util.lang.ObjectUtil;
-import com.github.houbb.heaven.util.lang.StringUtil;
-import com.github.houbb.sensitive.word.api.IWordContext;
-import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
-
-import java.util.Map;
-
-/**
- * <p> project: sensitive-word-NumUtils </p>
- * <p> create on 2020/1/8 22:18 </p>
- *
- * @author Administrator
- * @since 0.0.4
- */
-public final class InnerWordNumUtils {
-
-    private InnerWordNumUtils(){}
-
-    private static final String NUM_ONE = "⓪０零º₀⓿○" +
-            "１２３４５６７８９" +
-            "一二三四五六七八九" +
-            "壹贰叁肆伍陆柒捌玖" +
-            "¹²³⁴⁵⁶⁷⁸⁹" +
-            "₁₂₃₄₅₆₇₈₉" +
-            "①②③④⑤⑥⑦⑧⑨" +
-            "⑴⑵⑶⑷⑸⑹⑺⑻⑼" +
-            "⒈⒉⒊⒋⒌⒍⒎⒏⒐" +
-            "❶❷❸❹❺❻❼❽❾" +
-            "➀➁➂➃➄➅➆➇➈" +
-            "➊➋➌➍➎➏➐➑➒" +
-            "㈠㈡㈢㈣㈤㈥㈦㈧㈨" +
-            "⓵⓶⓷⓸⓹⓺⓻⓼⓽" +
-            "㊀㊁㊂㊃㊄㊅㊆㊇㊈" +
-            "ⅰⅱⅲⅳⅴⅵⅶⅷⅸ" +
-            "ⅠⅡⅢⅣⅤⅥⅦⅧⅨ";
-
-    private static final String NUM_TWO = "0000000"+
-            "123456789" +
-            "123456789" +
-            "123456789" +
-            "123456789" +
-            "123456789" +
-            "123456789" +
-            "123456789" +
-            "123456789" +
-            "123456789" +
-            "123456789" +
-            "123456789" +
-            "123456789" +
-            "123456789" +
-            "123456789" +
-            "123456789" +
-            "123456789";
-
-    /**
-     * 英文字母 map
-     * @since 0.0.4
-     */
-    private static final Map<Character, Character> NUMBER_MAP = Guavas.newHashMap(NUM_ONE.length());
-
-    static {
-        final int size = NUM_ONE.length();
-
-        for(int i = 0; i < size; i++) {
-            NUMBER_MAP.put(NUM_ONE.charAt(i), NUM_TWO.charAt(i));
-        }
-    }
-
-    /**
-     * 映射后的 char
-     * @param character 待转换的 char
-     * @return 结果
-     * @since 0.0.4
-     */
-    public static Character getMappingChar(final Character character) {
-        final Character mapChar = NUMBER_MAP.get(character);
-        if(ObjectUtil.isNotNull(mapChar)) {
-            return mapChar;
-        }
-
-        return character;
-    }
-
-    public static String getMappingString(final String string) {
-        if(StringUtil.isEmpty(string)) {
-            return string;
-        }
-
-        int length = string.length();
-        StringBuilder stringBuilder = new StringBuilder(length);
-        for(int i = 0; i < length; i++) {
-            char mapChar = getMappingChar(string.charAt(i));
-
-            //TODO: stop word 的处理
-            stringBuilder.append(mapChar);
-        }
-
-        return stringBuilder.toString();
-    }
-
-    /**
-     * 检查敏感词数量
-     * <p>
-     * （1）如果未命中敏感词，直接返回 0
-     * （2）命中敏感词，则返回敏感词的长度。
-     *
-     * ps: 这里结果进行优化，
-     * 1. 是否包含敏感词。
-     * 2. 敏感词的长度
-     * 3. 正常走过字段的长度（便于后期替换优化，避免不必要的循环重复）
-     *
-     * @param txt           文本信息
-     * @param beginIndex    开始下标
-     * @param wordValidModeEnum 验证模式
-     * @param context 执行上下文
-     * @return 敏感数字对应的长度
-     * @since 0.0.5
-     */
-    private int getSensitiveNumber(final String txt, final int beginIndex,
-                                   final WordValidModeEnum wordValidModeEnum,
-                                   final IWordContext context) {
-        return 0;
-    }
-
-}
--- a/src/test/java/com/github/houbb/sensitive/word/benchmark/CharUtilPerfTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/benchmark/CharUtilPerfTest.java
@@ -0,0 +1,43 @@
+package com.github.houbb.sensitive.word.benchmark;
+
+import com.github.houbb.heaven.util.lang.CharUtil;
+import com.github.houbb.sensitive.word.utils.InnerCharUtils;
+
+public class CharUtilPerfTest {
+
+
+    private static final int COUNT = 10_00_000;
+
+    public static void main(String[] args) {
+        char[] testData = new char[COUNT];
+        for (int i = 0; i < COUNT; i++) {
+            testData[i] = (char) ('A' + (i % 52)); // A-Z a-z
+        }
+
+        // 测试新小写
+        // 测试原始半角
+        char[] fullWidthData = new char[COUNT];
+        for (int i = 0; i < COUNT; i++) {
+            fullWidthData[i] = (char) ('\uFF01' + (i % 94)); // 常见全角字符
+        }
+
+        long t5 = System.currentTimeMillis();
+        char sum3 = 0;
+        for (char c : fullWidthData) {
+            sum3 += CharUtil.toHalfWidth(c);
+        }
+        long t6 = System.currentTimeMillis();
+        System.out.println("原始 toHalfWidth 耗时: " + (t6 - t5) + "ms, sum=" + sum3);
+
+        // 测试新半角
+        long t7 = System.currentTimeMillis();
+        char sum4 = 0;
+        for (char c : fullWidthData) {
+            sum4 += InnerCharUtils.toHalfWidth(c);
+        }
+        long t8 = System.currentTimeMillis();
+        System.out.println("优化 toHalfWidth 耗时: " + (t8 - t7) + "ms, sum=" + sum4);
+    }
+
+
+}
--- a/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java
@@ -1,149 +1,149 @@
-package com.github.houbb.sensitive.word.data;
-
-import com.github.houbb.heaven.support.filter.IFilter;
-import com.github.houbb.heaven.support.handler.IHandler;
-import com.github.houbb.heaven.util.io.FileUtil;
-import com.github.houbb.heaven.util.lang.StringUtil;
-import com.github.houbb.heaven.util.util.CollectionUtil;
-import com.github.houbb.opencc4j.core.impl.ZhConvertBootstrap;
-import com.github.houbb.opencc4j.support.segment.impl.CharSegment;
-import com.github.houbb.sensitive.word.utils.InnerWordNumUtils;
-import org.junit.Ignore;
-import org.junit.Test;
-
-import java.util.List;
-
-/**
- * 数据初始化
- * @author binbin.hou
- * @since 0.0.3
- */
-@Ignore
-public class DictSlimTest {
-
-    /**
-     * 统一格式
-     *
-     * 1. 将所有的大写字母统一转换为小写
-     * 2. 将所有的全角转换为半角
-     * 3. 移除所有【空格】【符号】(这个就是各种符号的过滤了)
-     * 4. 繁体字统一转换为简体字
-     * @since 0.0.3
-     */
-    @Test
-    @Ignore
-    public void formatTest() {
-        final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
-        final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
-
-        List<String> words = FileUtil.readAllLines(sourceFile);
-
-        List<String> formats = CollectionUtil.toList(words, new IHandler<String, String>() {
-            @Override
-            public String handle(String string) {
-                String lower = string.toLowerCase();
-                String half = StringUtil.toHalfWidth(lower);
-                String trim = StringUtil.trimAnyBlank(half);
-                String punc = StringUtil.trimAnyPunctionAndSymbol(trim);
-                return ZhConvertBootstrap.newInstance(new CharSegment()).toSimple(punc);
-            }
-        });
-
-        List<String> resultList = DataUtil.disctinctAndSort(formats);
-        FileUtil.write(targetFile, resultList);
-    }
-
-    /**
-     * 移除测试
-     *
-     * 1. 移除 QQ 号的类似数字
-     * 2. 移除所有网址（.com、cn、.org）
-     * 3. 移除纯英文
-     * 4. 移除乱码 `<60>`
-     * 5. 移除英文+数字的
-     *
-     * @since 0.0.3
-     */
-    @Test
-    @Ignore
-    public void removeTest() {
-        final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
-        final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
-
-        List<String> words = FileUtil.readAllLines(sourceFile);
-
-        List<String> formats = CollectionUtil.filterList(words, new IFilter<String>() {
-            @Override
-            public boolean filter(String string) {
-                return StringUtil.isDigitOrLetter(string)
-                        || string.contains("<EFBFBD>")
-                        || string.contains("删掉")
-                        || isUrl(string);
-            }
-        });
-
-        List<String> resultList = DataUtil.disctinctAndSort(formats);
-        FileUtil.write(targetFile, resultList);
-    }
-
-    /**
-     * 数字映射处理
-     * @since 0.0.4
-     */
-    @Test
-    public void removeNumberMappingTest() {
-        final String sourceFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
-        final String targetFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
-
-        List<String> words = FileUtil.readAllLines(sourceFile);
-        List<String> formats = CollectionUtil.toList(words, new IHandler<String, String>() {
-            @Override
-            public String handle(String s) {
-                return s.replaceAll(" ", "");
-            }
-        });
-        List<String> filters = CollectionUtil.filterList(formats, new IFilter<String>() {
-            @Override
-            public boolean filter(String string) {
-                return isNumber(string);
-            }
-        });
-
-        List<String> resultList = DataUtil.disctinctAndSort(filters);
-        FileUtil.write(targetFile, resultList);
-    }
-
-    /**
-     * 是否为存数字
-     * （1）数字小于4的直接跳过。
-     * @param string 原始字符串
-     * @return 结果
-     * @since 0.0.4
-     */
-    private static boolean isNumber(final String string) {
-        if(string.length() <= 4) {
-            return false;
-        }
-
-        // 停顿词语
-        String trim = string.replaceAll("加|否|与|和", "");
-        String mapString = InnerWordNumUtils.getMappingString(trim);
-        boolean result = StringUtil.isDigit(mapString);
-        if(result) {
-            System.out.println(string);
-        }
-        return result;
-    }
-
-    private static boolean isUrl(final String string) {
-        return string.endsWith(".com")
-                || string.endsWith(".cn")
-                || string.endsWith(".org");
-    }
-
-    public static void main(String[] args) {
-        String trim = "1和2".replaceAll("加|否|与|和", "");
-        System.out.println(trim);
-    }
-
-}
+//package com.github.houbb.sensitive.word.data;
+//
+//import com.github.houbb.heaven.support.filter.IFilter;
+//import com.github.houbb.heaven.support.handler.IHandler;
+//import com.github.houbb.heaven.util.io.FileUtil;
+//import com.github.houbb.heaven.util.lang.StringUtil;
+//import com.github.houbb.heaven.util.util.CollectionUtil;
+//import com.github.houbb.opencc4j.core.impl.ZhConvertBootstrap;
+//import com.github.houbb.opencc4j.support.segment.impl.CharSegment;
+//import com.github.houbb.sensitive.word.utils.InnerWordNumUtils;
+//import org.junit.Ignore;
+//import org.junit.Test;
+//
+//import java.util.List;
+//
+///**
+// * 数据初始化
+// * @author binbin.hou
+// * @since 0.0.3
+// */
+//@Ignore
+//public class DictSlimTest {
+//
+//    /**
+//     * 统一格式
+//     *
+//     * 1. 将所有的大写字母统一转换为小写
+//     * 2. 将所有的全角转换为半角
+//     * 3. 移除所有【空格】【符号】(这个就是各种符号的过滤了)
+//     * 4. 繁体字统一转换为简体字
+//     * @since 0.0.3
+//     */
+//    @Test
+//    @Ignore
+//    public void formatTest() {
+//        final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
+//        final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
+//
+//        List<String> words = FileUtil.readAllLines(sourceFile);
+//
+//        List<String> formats = CollectionUtil.toList(words, new IHandler<String, String>() {
+//            @Override
+//            public String handle(String string) {
+//                String lower = string.toLowerCase();
+//                String half = StringUtil.toHalfWidth(lower);
+//                String trim = StringUtil.trimAnyBlank(half);
+//                String punc = StringUtil.trimAnyPunctionAndSymbol(trim);
+//                return ZhConvertBootstrap.newInstance(new CharSegment()).toSimple(punc);
+//            }
+//        });
+//
+//        List<String> resultList = DataUtil.disctinctAndSort(formats);
+//        FileUtil.write(targetFile, resultList);
+//    }
+//
+//    /**
+//     * 移除测试
+//     *
+//     * 1. 移除 QQ 号的类似数字
+//     * 2. 移除所有网址（.com、cn、.org）
+//     * 3. 移除纯英文
+//     * 4. 移除乱码 `<60>`
+//     * 5. 移除英文+数字的
+//     *
+//     * @since 0.0.3
+//     */
+//    @Test
+//    @Ignore
+//    public void removeTest() {
+//        final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
+//        final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
+//
+//        List<String> words = FileUtil.readAllLines(sourceFile);
+//
+//        List<String> formats = CollectionUtil.filterList(words, new IFilter<String>() {
+//            @Override
+//            public boolean filter(String string) {
+//                return StringUtil.isDigitOrLetter(string)
+//                        || string.contains("<22>")
+//                        || string.contains("删掉")
+//                        || isUrl(string);
+//            }
+//        });
+//
+//        List<String> resultList = DataUtil.disctinctAndSort(formats);
+//        FileUtil.write(targetFile, resultList);
+//    }
+//
+//    /**
+//     * 数字映射处理
+//     * @since 0.0.4
+//     */
+//    @Test
+//    public void removeNumberMappingTest() {
+//        final String sourceFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
+//        final String targetFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
+//
+//        List<String> words = FileUtil.readAllLines(sourceFile);
+//        List<String> formats = CollectionUtil.toList(words, new IHandler<String, String>() {
+//            @Override
+//            public String handle(String s) {
+//                return s.replaceAll(" ", "");
+//            }
+//        });
+//        List<String> filters = CollectionUtil.filterList(formats, new IFilter<String>() {
+//            @Override
+//            public boolean filter(String string) {
+//                return isNumber(string);
+//            }
+//        });
+//
+//        List<String> resultList = DataUtil.disctinctAndSort(filters);
+//        FileUtil.write(targetFile, resultList);
+//    }
+//
+//    /**
+//     * 是否为存数字
+//     * （1）数字小于4的直接跳过。
+//     * @param string 原始字符串
+//     * @return 结果
+//     * @since 0.0.4
+//     */
+//    private static boolean isNumber(final String string) {
+//        if(string.length() <= 4) {
+//            return false;
+//        }
+//
+//        // 停顿词语
+//        String trim = string.replaceAll("加|否|与|和", "");
+////        String mapString = InnerWordNumUtils.getMappingString(trim);
+////        boolean result = StringUtil.isDigit(mapString);
+////        if(result) {
+////            System.out.println(string);
+////        }
+////        return result;
+//    }
+//
+//    private static boolean isUrl(final String string) {
+//        return string.endsWith(".com")
+//                || string.endsWith(".cn")
+//                || string.endsWith(".org");
+//    }
+//
+//    public static void main(String[] args) {
+//        String trim = "1和2".replaceAll("加|否|与|和", "");
+//        System.out.println(trim);
+//    }
+//
+//}
--- a/src/test/java/com/github/houbb/sensitive/word/support/format/EnglishStylePerfTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/support/format/EnglishStylePerfTest.java
@@ -0,0 +1,46 @@
+package com.github.houbb.sensitive.word.support.format;
+
+import com.github.houbb.sensitive.word.api.IWordContext;
+import com.github.houbb.sensitive.word.api.IWordFormat;
+
+public class EnglishStylePerfTest {
+
+    public static void main(String[] args) {
+        final int times = 200000;
+
+
+        // 不涉及
+        IWordContext context = null;
+
+        // 每次随机选择？
+        String demo1 = "产品尺寸参数§60mn§50mm§210枚/包§160枚/包§名称A4银色不干胶§规格60mm*40mm 送配套模板§规格70mm*50mm 送配套模板§数量每大张21枚一包10张总计210枚§数量每大张16枚一包10张总计160枚§适用激光打印机打印油性笔书写§95mm§100mn§55mm§100枚/包§80枚/包§名称 A4银色不干胶§规格95mm*55mm 送配套模板§规格100mm*70mm 送配套模板§数量每大张10枚一包10张总计100枚§数量 每大张8枚一包10张 总计80枚§100mm§120枚/包§140枚/包§规格80mm*50mm 送配套模板§规格100mm*40mm 送配套模板§数量每大张12枚一包10张总计120枚§数量§每大张14枚包10张总计140枚§适用 激光打印机打印油性笔书写§40mm§65mm§70mm§35mm§200枚/包§240枚/包§规格70mm*40mm送配套模板§规格§65mm*35mm 送配套模板§数量 每大张20枚一包10张总计200枚§每大张24枚包10张总计240枚§适 激光打印机打印油性笔书写§适用§激光打印机打印油性笔书写§40mn§280枚/包§360枚/包§规格50mm*40mm 送配套模板§规格40mm*30mm 送配套模板§数量每大张28枚一包10张总计280枚§数量每大张36枚一包10张总计360枚§45.7mm§38.1mm§400枚/包§650枚/包§45.7mm*25.4mm送配套模板§38.1mm*21.2mm 送配套模板§每大张40枚一包10张总计400枚§数量每大张65枚一包10张总计650枚§30mm§25mr§20mm§840枚/包§1260枚/包§规格 30mm*20mm 送配套模板§规格25mm*13mm 送配套模板§数量每张84枚包10张总计840枚§数量每大张126枚一包10张总计1260枚§46mm§意制§任§1000枚/包§定§名称定制A4内割银不胶§规格46mm*11.1mm送配套模板§任意规格定制§每大张100枚包10张总计1000枚§包10张满5包送专属模板§适激光打印机打印油性笔书写§产品实拍§8格打印实拍展示(100mm*70mm)§上海荠骞文化用品固定资产标识卡§资产编号：§规格型号：§资产名称：§使用状态：§资产类别：§资产原值§存放地点§生产厂家：§使用人§备§注：§*请爱护公司财产，不要随意撕毁此标签§16格全内容打印实拍展示§固定资产标识卡§资产名称§四层货架（平板）§资产编号§3F跑菜区§规格型号§1800×500×1500§使用部门§财务部§使用时间§2019-04-26§李强§21格手写款打印展示 (60mm*40mm)§固定资标识卡§36格打印实拍展示(40mm*30mm)§固定资产标签§名称:§编号:§部门:§40格打印实拍展示(45.7mm*25.4mm)§固定资§名称：电脑§编号：20210§部门：财务部§20210201§使用人：我最强§八：找最强§编号：20210201§65格打印实拍展示(38mm*21mm)§名称：§编号：§数量：§数量:§100格打印实拍展示(46mm*11.1mm)§客服电话：159 9569 3815§: 159 9569 3815§.§客服电话：159 9569§客服电话：1599§客服电话§服电话：159 9569 3815§话：159 9569 3815§客服电话：1599569 3815§电话：159 9569 3815§9569 3815§159 9569 3815§客服电话：§低值易耗品标识牌(70mm*50mm)§购买日期§保管部门§责任人§生产厂家§不要随意撕毁此标牌*§*请爱护公司财产，不要随意撕导§品标识牌§低值易耗品标识牌§随意撕毁此标牌*§*请爱护公司财产，不要随意撕毁此标牌*§三人沙发§行政酒廊§2200*860*900§2018-07-23§应用范围§多用于产品信息固有资产登记航空仓库管理 医疗政府机构等§Mainly used for product information inherent assets registration, aviation warehouse management, medi§cal government institutions, etc§政府单位§企业办公§仓储行业§医疗器械§教育单位§耐用品§电子产品包装§商城卖场";
+        // hash
+        cost1(demo1, times, context);
+        cost2(demo1, times, context);
+    }
+
+    private static void cost1(String text, int times, IWordContext context) {
+        IWordFormat hashMap = new WordFormatIgnoreEnglishStyle();
+
+        long s1 = System.currentTimeMillis();
+        for(int i = 0; i < times; i++) {
+            char c = text.charAt(i % text.length());
+            hashMap.format(c, context);
+        }
+        long cost = System.currentTimeMillis() - s1;
+        System.out.println(cost);
+    }
+
+    private static void cost2(String text, int times, IWordContext context) {
+        IWordFormat hashMap = new WordFormatIgnoreEnglishStyleC2C();
+
+        long s1 = System.currentTimeMillis();
+        for(int i = 0; i < times; i++) {
+            char c = text.charAt(i % text.length());
+            hashMap.format(c, context);
+        }
+        long cost = System.currentTimeMillis() - s1;
+        System.out.println(cost);
+    }
+
+}
--- a/src/test/java/com/github/houbb/sensitive/word/support/format/package-info.java
+++ b/src/test/java/com/github/houbb/sensitive/word/support/format/package-info.java
@@ -0,0 +1 @@
+package com.github.houbb.sensitive.word.support.format;
				`@@ -0,0 +1 @@`
				`package com.github.houbb.sensitive.word.support.format;`