diff --git a/CHANGE_LOG.md b/CHANGE_LOG.md index 297127e..c79b2fb 100644 --- a/CHANGE_LOG.md +++ b/CHANGE_LOG.md @@ -471,3 +471,9 @@ |:---|:-----|-----------------------------|:------------------|:--------------------| | 1 | O | 改进 check、format 的 chains 方法 | 2025-9-5 16:22:24 | 优化性能 | | 2 | O | InnerWordFormatUtils#format | 2025-9-5 16:22:24 | 优化性能+内存 toCharArray | + +# release_0.29.2 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:-----|---------------------|:------------------|:--------------------| +| 1 | O | 拆箱、装箱优化。优化数字。英文的格式化 | 2025-9-5 16:22:24 | 优化性能 | diff --git a/pom.xml b/pom.xml index 115dfd7..de6838b 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.github.houbb sensitive-word - 0.29.1 + 0.29.2 @@ -115,7 +115,6 @@ org.apache.lucene lucene-core - @@ -182,6 +181,20 @@ org.apache.maven.plugins maven-javadoc-plugin ${plugin.maven-javadoc-plugin.version} + + + UTF-8 + + UTF-8 + + UTF-8 + + false + + + -Xdoclint:none + + @@ -252,6 +265,17 @@ org.apache.maven.plugins maven-javadoc-plugin ${plugin.maven-javadoc-plugin.version} + + UTF-8 + UTF-8 + UTF-8 + + -Xdoclint:none + -charset UTF-8 + -encoding UTF-8 + -docencoding UTF-8 + + package diff --git a/src/main/java/com/github/houbb/sensitive/word/collection/Char2CharMap.java b/src/main/java/com/github/houbb/sensitive/word/collection/Char2CharMap.java new file mode 100644 index 0000000..cc326f8 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/collection/Char2CharMap.java @@ -0,0 +1,106 @@ +package com.github.houbb.sensitive.word.collection; + +/** + * 原生无装箱、拆箱的实现 + * + * @since 0.29.2 + */ +public final class Char2CharMap { + + private static final char EMPTY_KEY = '\0'; // 特殊标记,表示空槽 + private static final float LOAD_FACTOR = 0.5f; + + private char[] keys; + private char[] values; + private int size; + private int mask; // capacity-1,用于快速取模 + private int maxSize; + + public Char2CharMap(int expectedSize) { + int capacity = tableSizeFor((int) (expectedSize / LOAD_FACTOR) + 1); + this.keys = new char[capacity]; + this.values = new char[capacity]; + this.mask = capacity - 1; + this.maxSize = (int) (capacity * LOAD_FACTOR); + this.size = 0; + } + + /** 2 的幂次方容量 */ + private static int tableSizeFor(int cap) { + int n = cap - 1; + n |= n >>> 1; + n |= n >>> 2; + n |= n >>> 4; + n |= n >>> 8; + n |= n >>> 16; + return (n < 2) ? 2 : (n >= (1 << 30) ? (1 << 30) : n + 1); + } + + private int hash(char k) { + return (k * 0x9E3779B9) & mask; // 乘法哈希 + mask + } + + /** 插入或覆盖 */ + public void put(char key, char value) { + if (key == EMPTY_KEY) { + throw new IllegalArgumentException("Key '\0' is reserved as EMPTY_KEY."); + } + int idx = hash(key); + while (true) { + if (keys[idx] == EMPTY_KEY) { + keys[idx] = key; + values[idx] = value; + if (++size >= maxSize) { + resize(); + } + return; + } else if (keys[idx] == key) { + values[idx] = value; + return; + } + idx = (idx + 1) & mask; + } + } + + /** 查询,不存在时返回 defaultValue */ + public char get(char key, char defaultValue) { + if (key == EMPTY_KEY) return defaultValue; + int idx = hash(key); + while (true) { + char k = keys[idx]; + if (k == EMPTY_KEY) return defaultValue; + if (k == key) return values[idx]; + idx = (idx + 1) & mask; + } + } + + public char get(char key) { + char defaultVal = 0; + return get(key, defaultVal); + } + + private void resize() { + int newCap = keys.length << 1; + char[] oldKeys = keys; + char[] oldVals = values; + + keys = new char[newCap]; + values = new char[newCap]; + mask = newCap - 1; + maxSize = (int) (newCap * LOAD_FACTOR); + size = 0; + + for (int i = 0; i < oldKeys.length; i++) { + char k = oldKeys[i]; + if (k != EMPTY_KEY) { + put(k, oldVals[i]); + } + } + } + + public int size() { + return size; + } +} + + diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java deleted file mode 100644 index a4d4c6d..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDataHashMap.java +++ /dev/null @@ -1,203 +0,0 @@ -package com.github.houbb.sensitive.word.support.data; - -import com.github.houbb.heaven.util.lang.ObjectUtil; -import com.github.houbb.heaven.util.lang.StringUtil; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext; -import com.github.houbb.sensitive.word.constant.WordConst; -import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum; - -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; - -/** - * 敏感词 map - * - * 不再维护,降低维护成本 - * - * @author binbin.hou - * @since 0.0.1 - */ -@Deprecated -public class WordDataHashMap extends AbstractWordData { - - /** - * 脱敏单词 map - * - * @since 0.0.1 - */ - private Map innerWordMap; - - /** - * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型: - * - * @param collection 敏感词库集合 - * @since 0.0.1 - *

- * 使用对象代码 map 的这种一直递归。 - * 参考资料:https://www.cnblogs.com/AlanLee/p/5329555.html - * https://blog.csdn.net/chenssy/article/details/26961957 - */ - @Override - @SuppressWarnings("unchecked") - public synchronized void doInitWordData(Collection collection) { - // 避免扩容带来的消耗 - Map newInnerWordMap = new HashMap(collection.size()); - - for (String key : collection) { - if (StringUtil.isEmpty(key)) { - continue; - } - - // 用来按照相应的格式保存敏感词库数据 - final int size = key.length(); - - // 每一个新词的循环,直接将结果设置为当前 map,所有变化都会体现在结果的 map 中 - Map currentMap = newInnerWordMap; - - for (int i = 0; i < size; i++) { - // 截取敏感词当中的字,在敏感词库中字为HashMap对象的Key键值 - char charKey = key.charAt(i); - // 如果集合存在 - Object wordMap = currentMap.get(charKey); - - // 如果集合存在 - if (ObjectUtil.isNotNull(wordMap)) { - // 直接将获取到的 map 当前当前 map 进行继续的操作 - currentMap = (Map) wordMap; - } else { - //不存在则,则构建一个新的map,同时将isEnd设置为0,因为他不是最后一 - Map newWordMap = new HashMap<>(8); - newWordMap.put(WordConst.IS_END, false); - - // 将新的节点放入当前 map 中 - currentMap.put(charKey, newWordMap); - - // 将新节点设置为当前节点,方便下一次节点的循环。 - currentMap = newWordMap; - } - } - - // 判断是否为最后一个,添加是否结束的标识。 - currentMap.put(WordConst.IS_END, true); - } - - // 最后更新为新的 map,保证更新过程中旧的数据可用 - this.innerWordMap = newInnerWordMap; - } - - @Override - protected void doRemoveWord(Collection collection) { - - } - - @Override - protected void doAddWord(Collection collection) { - - } - - /** - * 是否包含 - * (1)直接遍历所有 - * (2)如果遇到,则直接返回 true - * - * @param stringBuilder 字符串 - * @param innerContext 内部上下文 - * @return 是否包含 - * @since 0.0.1 - */ - @Override - public WordContainsTypeEnum doContains(final StringBuilder stringBuilder, - final InnerSensitiveWordContext innerContext) { - return innerContainsSensitive(stringBuilder, innerContext); - } - - private WordContainsTypeEnum innerContainsSensitive(StringBuilder stringBuilder, - final InnerSensitiveWordContext innerContext) { - // 初始化为当前的 map - Map nowMap = this.innerWordMap; - - // 记录敏感词的长度 - final int len = stringBuilder.length(); - for (int i = 0; i < len; i++) { - // 获取当前的 map 信息 - nowMap = getNowMap(nowMap, i, stringBuilder, innerContext); - - // 如果不为空,则判断是否为结尾。 - if (ObjectUtil.isNull(nowMap)) { - return WordContainsTypeEnum.NOT_FOUND; - } - } - - // 是否为结尾,便于快速失败 - boolean isEnd = isEnd(nowMap); - if(isEnd) { - return WordContainsTypeEnum.CONTAINS_END; - } - - return WordContainsTypeEnum.CONTAINS_PREFIX; - } - - /** - * 判断是否结束 - * BUG-FIX: 避免出现敏感词库中没有的文字。 - * @param map map 信息 - * @return 是否结束 - * @since 0.0.9 - */ - private static boolean isEnd(final Map map) { - if(ObjectUtil.isNull(map)) { - return false; - } - - Object value = map.get(WordConst.IS_END); - if(ObjectUtil.isNull(value)) { - return false; - } - - return (boolean)value; - } - /** - * 获取当前的 Map - * @param nowMap 原始的当前 map - * @param index 下标 - * @param stringBuilder 文本缓存 - * @param sensitiveContext 上下文 - * @return 实际的当前 map - * @since 0.0.7 - */ - private Map getNowMap(Map nowMap, - final int index, - final StringBuilder stringBuilder, - final InnerSensitiveWordContext sensitiveContext) { - final IWordContext context = sensitiveContext.wordContext(); - - // 这里的 char 已经是统一格式化之后的,所以可以不用再次格式化。 - char mappingChar = stringBuilder.charAt(index); - - // 这里做一次重复词的处理 - //TODO: 这里可以优化,是否获取一次。 - Map currentMap = (Map) nowMap.get(mappingChar); - // 启用忽略重复&当前下标不是第一个 - if(context.ignoreRepeat() - && index > 0) { - char preMappingChar = stringBuilder.charAt(index-1); - - // 直接赋值为上一个 map - if(preMappingChar == mappingChar) { - currentMap = nowMap; - } - } - - return currentMap; - } - - @Override - public synchronized void destroy() { - if(innerWordMap != null) { - innerWordMap.clear(); - } - } - -} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreEnglishStyle.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreEnglishStyle.java index eb81f50..a3d5745 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreEnglishStyle.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreEnglishStyle.java @@ -1,15 +1,18 @@ package com.github.houbb.sensitive.word.support.format; import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.sensitive.word.api.IWordFormat; import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.utils.InnerWordCharUtils; +import com.github.houbb.sensitive.word.api.IWordFormat; + +import java.util.HashMap; +import java.util.Map; /** * 忽略英文的各种格式 * @author binbin.hou * @since 0.0.6 */ +@Deprecated @ThreadSafe public class WordFormatIgnoreEnglishStyle implements IWordFormat { @@ -19,9 +22,52 @@ public class WordFormatIgnoreEnglishStyle implements IWordFormat { return INSTANCE; } + /** + * 英文字母1 + * @since 0.0.4 + */ + private static final String LETTERS_ONE = + "ⒶⒷⒸⒹⒺⒻⒼⒽⒾⒿⓀⓁⓂⓃⓄⓅⓆⓇⓈⓉⓊⓋⓌⓍⓎⓏ" + + "ⓐⓑⓒⓓⓔⓕⓖⓗⓘⓙⓚⓛⓜⓝⓞⓟⓠⓡⓢⓣⓤⓥⓦⓧⓨⓩ" + + "⒜⒝⒞⒟⒠⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴⒵"; + + /** + * 英文字母2 + * @since 0.0.4 + */ + private static final String LETTERS_TWO = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + + "abcdefghijklmnopqrstuvwxyz" + + "abcdefghijklmnopqrstuvwxyz"; + + + /** + * 字母映射表 + */ + private static final Map LETTER_MAP = new HashMap<>(LETTERS_ONE.length()); + + static { + final int size = LETTERS_ONE.length(); + for(int i = 0; i < size; i++) { + LETTER_MAP.put(LETTERS_ONE.charAt(i), LETTERS_TWO.charAt(i)); + } + } + + /** + * 映射后的 char + * @param c 待转换的 char + * @return 转换结果 + * @since 0.29.x + */ + private char getMappingChar(final char c) { + Character mapChar = LETTER_MAP.get(c); + return mapChar == null ? c : mapChar; + } + + @Override public char format(char original, IWordContext context) { - return InnerWordCharUtils.getMappingChar(original); + return getMappingChar(original); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreEnglishStyleC2C.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreEnglishStyleC2C.java new file mode 100644 index 0000000..4383dd0 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreEnglishStyleC2C.java @@ -0,0 +1,69 @@ +package com.github.houbb.sensitive.word.support.format; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.api.IWordFormat; +import com.github.houbb.sensitive.word.collection.Char2CharMap; + +/** + * 忽略英文的各种格式 + * @author binbin.hou + * @since 0.0.6 + */ +@ThreadSafe +public class WordFormatIgnoreEnglishStyleC2C implements IWordFormat { + + private static final IWordFormat INSTANCE = new WordFormatIgnoreEnglishStyleC2C(); + + public static IWordFormat getInstance() { + return INSTANCE; + } + + /** + * 英文字母1 + * @since 0.0.4 + */ + private static final String LETTERS_ONE = + "ⒶⒷⒸⒹⒺⒻⒼⒽⒾⒿⓀⓁⓂⓃⓄⓅⓆⓇⓈⓉⓊⓋⓌⓍⓎⓏ" + + "ⓐⓑⓒⓓⓔⓕⓖⓗⓘⓙⓚⓛⓜⓝⓞⓟⓠⓡⓢⓣⓤⓥⓦⓧⓨⓩ" + + "⒜⒝⒞⒟⒠⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴⒵"; + + /** + * 英文字母2 + * @since 0.0.4 + */ + private static final String LETTERS_TWO = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + + "abcdefghijklmnopqrstuvwxyz" + + "abcdefghijklmnopqrstuvwxyz"; + + + /** + * 字母映射表 + */ + private static final Char2CharMap LETTER_MAP = new Char2CharMap(LETTERS_ONE.length()); + + static { + final int size = LETTERS_ONE.length(); + for(int i = 0; i < size; i++) { + LETTER_MAP.put(LETTERS_ONE.charAt(i), LETTERS_TWO.charAt(i)); + } + } + + /** + * 映射后的 char + * @param c 待转换的 char + * @return 转换结果 + * @since 0.29.x + */ + private char getMappingChar(final char c) { + char mc = LETTER_MAP.get(c); + return mc == 0 ? c : mc; + } + + @Override + public char format(char original, IWordContext context) { + return getMappingChar(original); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreNumStyle.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreNumStyle.java index 89c85a2..0d65e46 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreNumStyle.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreNumStyle.java @@ -3,13 +3,16 @@ package com.github.houbb.sensitive.word.support.format; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordFormat; -import com.github.houbb.sensitive.word.utils.InnerWordNumUtils; + +import java.util.HashMap; +import java.util.Map; /** * 忽略数字的样式 * @author binbin.hou * @since 0.0.5 */ +@Deprecated @ThreadSafe public class WordFormatIgnoreNumStyle implements IWordFormat { @@ -19,9 +22,65 @@ public class WordFormatIgnoreNumStyle implements IWordFormat { return INSTANCE; } + private static final String NUM_ONE = "⓪0零º₀⓿○" + + "123456789" + + "一二三四五六七八九" + + "壹贰叁肆伍陆柒捌玖" + + "¹²³⁴⁵⁶⁷⁸⁹" + + "₁₂₃₄₅₆₇₈₉" + + "①②③④⑤⑥⑦⑧⑨" + + "⑴⑵⑶⑷⑸⑹⑺⑻⑼" + + "⒈⒉⒊⒋⒌⒍⒎⒏⒐" + + "❶❷❸❹❺❻❼❽❾" + + "➀➁➂➃➄➅➆➇➈" + + "➊➋➌➍➎➏➐➑➒" + + "㈠㈡㈢㈣㈤㈥㈦㈧㈨" + + "⓵⓶⓷⓸⓹⓺⓻⓼⓽" + + "㊀㊁㊂㊃㊄㊅㊆㊇㊈" + + "ⅰⅱⅲⅳⅴⅵⅶⅷⅸ" + + "ⅠⅡⅢⅣⅤⅥⅦⅧⅨ"; + + private static final String NUM_TWO = "0000000"+ + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789"; + + private static final Map NUMBER_MAP = new HashMap<>(NUM_ONE.length()); + + static { + final int size = NUM_ONE.length(); + for(int i = 0; i < size; i++) { + NUMBER_MAP.put(NUM_ONE.charAt(i), NUM_TWO.charAt(i)); + } + } + + /** + * 映射后的 char + * @param c 待转换的 char + * @return 结果 + * @since 0.0.4 + */ + private char getMappingChar(final char c) { + Character mapChar = NUMBER_MAP.get(c); + return mapChar == null ? c : mapChar; + } + @Override public char format(char original, IWordContext context) { - return InnerWordNumUtils.getMappingChar(original); + return getMappingChar(original); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreNumStyleC2C.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreNumStyleC2C.java new file mode 100644 index 0000000..a92efe9 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreNumStyleC2C.java @@ -0,0 +1,86 @@ +package com.github.houbb.sensitive.word.support.format; + +import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.api.IWordFormat; +import com.github.houbb.sensitive.word.collection.Char2CharMap; + +import java.util.HashMap; +import java.util.Map; + +/** + * 忽略数字的样式 + * @author binbin.hou + * @since 0.0.5 + */ +@ThreadSafe +public class WordFormatIgnoreNumStyleC2C implements IWordFormat { + + private static final IWordFormat INSTANCE = new WordFormatIgnoreNumStyleC2C(); + + public static IWordFormat getInstance() { + return INSTANCE; + } + + private static final String NUM_ONE = "⓪0零º₀⓿○" + + "123456789" + + "一二三四五六七八九" + + "壹贰叁肆伍陆柒捌玖" + + "¹²³⁴⁵⁶⁷⁸⁹" + + "₁₂₃₄₅₆₇₈₉" + + "①②③④⑤⑥⑦⑧⑨" + + "⑴⑵⑶⑷⑸⑹⑺⑻⑼" + + "⒈⒉⒊⒋⒌⒍⒎⒏⒐" + + "❶❷❸❹❺❻❼❽❾" + + "➀➁➂➃➄➅➆➇➈" + + "➊➋➌➍➎➏➐➑➒" + + "㈠㈡㈢㈣㈤㈥㈦㈧㈨" + + "⓵⓶⓷⓸⓹⓺⓻⓼⓽" + + "㊀㊁㊂㊃㊄㊅㊆㊇㊈" + + "ⅰⅱⅲⅳⅴⅵⅶⅷⅸ" + + "ⅠⅡⅢⅣⅤⅥⅦⅧⅨ"; + + private static final String NUM_TWO = "0000000"+ + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789" + + "123456789"; + + private static final Char2CharMap NUMBER_MAP = new Char2CharMap(NUM_ONE.length()); + + static { + final int size = NUM_ONE.length(); + for(int i = 0; i < size; i++) { + NUMBER_MAP.put(NUM_ONE.charAt(i), NUM_TWO.charAt(i)); + } + } + + /** + * 映射后的 char + * @param c 待转换的 char + * @return 结果 + * @since 0.0.4 + */ + private char getMappingChar(final char c) { + char mc = NUMBER_MAP.get(c); + return mc == 0 ? c : mc; + } + + @Override + public char format(char original, IWordContext context) { + return getMappingChar(original); + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreWidth.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreWidth.java index 4b700bd..2191310 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreWidth.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatIgnoreWidth.java @@ -1,9 +1,9 @@ package com.github.houbb.sensitive.word.support.format; import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.heaven.util.lang.CharUtil; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordFormat; +import com.github.houbb.sensitive.word.utils.InnerCharUtils; /** * 格式化字宽度 @@ -21,7 +21,7 @@ public class WordFormatIgnoreWidth implements IWordFormat { @Override public char format(char original, IWordContext context) { - return CharUtil.toHalfWidth(original); + return InnerCharUtils.toHalfWidth(original); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatInit.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatInit.java deleted file mode 100644 index c047d59..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormatInit.java +++ /dev/null @@ -1,44 +0,0 @@ -package com.github.houbb.sensitive.word.support.format; - -import com.github.houbb.heaven.annotation.ThreadSafe; -import com.github.houbb.heaven.support.pipeline.Pipeline; -import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline; -import com.github.houbb.sensitive.word.api.IWordFormat; -import com.github.houbb.sensitive.word.api.IWordContext; - -import java.util.List; - -/** - * 格式化责任链 - * @author binbin.hou - * @since 0.0.5 - */ -@ThreadSafe -@Deprecated -public abstract class WordFormatInit implements IWordFormat { - - /** - * 初始化列表 - * - * @param pipeline 当前列表泳道 - * @since 0.0.13 - */ - protected abstract void init(final Pipeline pipeline); - - @Override - public char format(char original, IWordContext context) { - Pipeline pipeline = new DefaultPipeline<>(); - init(pipeline); - - char result = original; - - // 循环执行 - List charFormats = pipeline.list(); - for(IWordFormat charFormat : charFormats) { - result = charFormat.format(result, context); - } - - return result; - } - -} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java index 505ea0d..1d67652 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/WordFormats.java @@ -51,7 +51,7 @@ public final class WordFormats { } public static IWordFormat ignoreEnglishStyle() { - return WordFormatIgnoreEnglishStyle.getInstance(); + return WordFormatIgnoreEnglishStyleC2C.getInstance(); } public static IWordFormat ignoreChineseStyle() { @@ -59,7 +59,7 @@ public final class WordFormats { } public static IWordFormat ignoreNumStyle() { - return WordFormatIgnoreNumStyle.getInstance(); + return WordFormatIgnoreNumStyleC2C.getInstance(); } public static IWordFormat ignoreWidth() { diff --git a/src/main/java/com/github/houbb/sensitive/word/support/format/mapping/WordFormatTextDefault.java b/src/main/java/com/github/houbb/sensitive/word/support/format/mapping/WordFormatTextDefault.java index b41b20d..b36fd9f 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/format/mapping/WordFormatTextDefault.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/format/mapping/WordFormatTextDefault.java @@ -2,7 +2,6 @@ package com.github.houbb.sensitive.word.support.format.mapping; import com.github.houbb.sensitive.word.api.IWordContext; import com.github.houbb.sensitive.word.api.IWordFormat; -import com.github.houbb.sensitive.word.support.check.WordCheckNone; import com.github.houbb.sensitive.word.support.format.WordFormatNone; import java.util.Collections; @@ -26,6 +25,7 @@ public class WordFormatTextDefault extends AbstractWordFormatText { return Collections.emptyMap(); } + //v0.29.2 Map map = new HashMap<>(); for(int i = 0; i < text.length(); i++) { char c = text.charAt(i); diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java index 55b1c2a..0a2ea9f 100644 --- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerCharUtils.java @@ -5,6 +5,24 @@ package com.github.houbb.sensitive.word.utils; */ public class InnerCharUtils { + /** + * 转换为半角 + * @param original 原始 + * @return 半角 + * @since 0.29.2 + */ + public static char toHalfWidth(char original) { + // 全角空格 + if (original == '\u3000') return ' '; + // 其他可转换全角字符 + if (original >= '\uFF01' && original <= '\uFF5E') { + return (char) (original - 0xFEE0); + } + // 其他字符保持不变 + return original; + } + + /** * 转换为整数 * @param text 文本 diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordCharUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordCharUtils.java index ca61060..a15b62c 100644 --- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordCharUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordCharUtils.java @@ -1,11 +1,7 @@ package com.github.houbb.sensitive.word.utils; -import com.github.houbb.heaven.util.guava.Guavas; -import com.github.houbb.heaven.util.lang.ObjectUtil; import com.github.houbb.sensitive.word.api.IWordResult; -import java.util.Map; - /** *

project: sensitive-word-NumUtils

*

create on 2020/1/8 22:18

@@ -18,84 +14,6 @@ public final class InnerWordCharUtils { private InnerWordCharUtils() { } - /** - * 英文字母1 - * @since 0.0.4 - */ - private static final String LETTERS_ONE = - "ⒶⒷⒸⒹⒺⒻⒼⒽⒾⒿⓀⓁⓂⓃⓄⓅⓆⓇⓈⓉⓊⓋⓌⓍⓎⓏ" + - "ⓐⓑⓒⓓⓔⓕⓖⓗⓘⓙⓚⓛⓜⓝⓞⓟⓠⓡⓢⓣⓤⓥⓦⓧⓨⓩ" + - "⒜⒝⒞⒟⒠⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴⒵"; - - /** - * 英文字母2 - * @since 0.0.4 - */ - private static final String LETTERS_TWO = - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + - "abcdefghijklmnopqrstuvwxyz" + - "abcdefghijklmnopqrstuvwxyz"; - - - /** - * 英文字母 map - * @since 0.0.4 - */ - private static final Map LETTER_MAP = Guavas.newHashMap(LETTERS_ONE.length()); - - static { - final int size = LETTERS_ONE.length(); - - for(int i = 0; i < size; i++) { - LETTER_MAP.put(LETTERS_ONE.charAt(i), LETTERS_TWO.charAt(i)); - } - } - - /** - * 映射后的 char - * @param character 待转换的 char - * @return 结果 - * @since 0.0.4 - */ - public static Character getMappingChar(final Character character) { - final Character mapChar = LETTER_MAP.get(character); - if(ObjectUtil.isNotNull(mapChar)) { - return mapChar; - } - - return character; - } - - /** - * 构建字符串 - * @param chars 字符数组 - * @param startIndex 开始位置 - * @param endIndex 结束位置 - * @return 结果 - * @since 0.5.0 - */ -// @Deprecated -// public static String getString(final char[] chars, -// final int startIndex, -// final int endIndex) { -// // 截取 -// int len = endIndex - startIndex; -// return new String(chars, startIndex, len); -// } - - /** - * 构建字符串 - * @param chars 字符数组 - * @param wordResult 结果 - * @return 结果 - * @since 0.5.0 - */ -// @Deprecated -// public static String getString(final char[] chars, -// final IWordResult wordResult) { -// return getString(chars, wordResult.startIndex(), wordResult.endIndex()); -// } - /** * 构建字符串 * @param text 字符串 diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java index a03775f..e0ea24b 100644 --- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordFormatUtils.java @@ -47,17 +47,20 @@ public final class InnerWordFormatUtils { /** * 字符串统一的格式化处理 + * + * 注意:这个需要 map 的实现是 {@link it.unimi.dsi.fastutil.chars.Char2CharOpenHashMap} * @param map 映射集合 * @param c 原始 * @return 结果 * @since 0.28.0 */ public static char getMappingChar(final Map map, char c) { - Character mc = map.get(c); - if(mc != null) { - return mc; + //Char2CharOpenHashMap 不存在映射也是返回 null + Object mc = map.get(c); + if(mc == null) { + return c; } - return c; + return (char) mc; } /** diff --git a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordNumUtils.java b/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordNumUtils.java deleted file mode 100644 index ce9d8a3..0000000 --- a/src/main/java/com/github/houbb/sensitive/word/utils/InnerWordNumUtils.java +++ /dev/null @@ -1,128 +0,0 @@ -package com.github.houbb.sensitive.word.utils; - -import com.github.houbb.heaven.util.guava.Guavas; -import com.github.houbb.heaven.util.lang.ObjectUtil; -import com.github.houbb.heaven.util.lang.StringUtil; -import com.github.houbb.sensitive.word.api.IWordContext; -import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum; - -import java.util.Map; - -/** - *

project: sensitive-word-NumUtils

- *

create on 2020/1/8 22:18

- * - * @author Administrator - * @since 0.0.4 - */ -public final class InnerWordNumUtils { - - private InnerWordNumUtils(){} - - private static final String NUM_ONE = "⓪0零º₀⓿○" + - "123456789" + - "一二三四五六七八九" + - "壹贰叁肆伍陆柒捌玖" + - "¹²³⁴⁵⁶⁷⁸⁹" + - "₁₂₃₄₅₆₇₈₉" + - "①②③④⑤⑥⑦⑧⑨" + - "⑴⑵⑶⑷⑸⑹⑺⑻⑼" + - "⒈⒉⒊⒋⒌⒍⒎⒏⒐" + - "❶❷❸❹❺❻❼❽❾" + - "➀➁➂➃➄➅➆➇➈" + - "➊➋➌➍➎➏➐➑➒" + - "㈠㈡㈢㈣㈤㈥㈦㈧㈨" + - "⓵⓶⓷⓸⓹⓺⓻⓼⓽" + - "㊀㊁㊂㊃㊄㊅㊆㊇㊈" + - "ⅰⅱⅲⅳⅴⅵⅶⅷⅸ" + - "ⅠⅡⅢⅣⅤⅥⅦⅧⅨ"; - - private static final String NUM_TWO = "0000000"+ - "123456789" + - "123456789" + - "123456789" + - "123456789" + - "123456789" + - "123456789" + - "123456789" + - "123456789" + - "123456789" + - "123456789" + - "123456789" + - "123456789" + - "123456789" + - "123456789" + - "123456789" + - "123456789"; - - /** - * 英文字母 map - * @since 0.0.4 - */ - private static final Map NUMBER_MAP = Guavas.newHashMap(NUM_ONE.length()); - - static { - final int size = NUM_ONE.length(); - - for(int i = 0; i < size; i++) { - NUMBER_MAP.put(NUM_ONE.charAt(i), NUM_TWO.charAt(i)); - } - } - - /** - * 映射后的 char - * @param character 待转换的 char - * @return 结果 - * @since 0.0.4 - */ - public static Character getMappingChar(final Character character) { - final Character mapChar = NUMBER_MAP.get(character); - if(ObjectUtil.isNotNull(mapChar)) { - return mapChar; - } - - return character; - } - - public static String getMappingString(final String string) { - if(StringUtil.isEmpty(string)) { - return string; - } - - int length = string.length(); - StringBuilder stringBuilder = new StringBuilder(length); - for(int i = 0; i < length; i++) { - char mapChar = getMappingChar(string.charAt(i)); - - //TODO: stop word 的处理 - stringBuilder.append(mapChar); - } - - return stringBuilder.toString(); - } - - /** - * 检查敏感词数量 - *

- * (1)如果未命中敏感词,直接返回 0 - * (2)命中敏感词,则返回敏感词的长度。 - * - * ps: 这里结果进行优化, - * 1. 是否包含敏感词。 - * 2. 敏感词的长度 - * 3. 正常走过字段的长度(便于后期替换优化,避免不必要的循环重复) - * - * @param txt 文本信息 - * @param beginIndex 开始下标 - * @param wordValidModeEnum 验证模式 - * @param context 执行上下文 - * @return 敏感数字对应的长度 - * @since 0.0.5 - */ - private int getSensitiveNumber(final String txt, final int beginIndex, - final WordValidModeEnum wordValidModeEnum, - final IWordContext context) { - return 0; - } - -} diff --git a/src/test/java/com/github/houbb/sensitive/word/benchmark/CharUtilPerfTest.java b/src/test/java/com/github/houbb/sensitive/word/benchmark/CharUtilPerfTest.java new file mode 100644 index 0000000..000d3ee --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/benchmark/CharUtilPerfTest.java @@ -0,0 +1,43 @@ +package com.github.houbb.sensitive.word.benchmark; + +import com.github.houbb.heaven.util.lang.CharUtil; +import com.github.houbb.sensitive.word.utils.InnerCharUtils; + +public class CharUtilPerfTest { + + + private static final int COUNT = 10_00_000; + + public static void main(String[] args) { + char[] testData = new char[COUNT]; + for (int i = 0; i < COUNT; i++) { + testData[i] = (char) ('A' + (i % 52)); // A-Z a-z + } + + // 测试新小写 + // 测试原始半角 + char[] fullWidthData = new char[COUNT]; + for (int i = 0; i < COUNT; i++) { + fullWidthData[i] = (char) ('\uFF01' + (i % 94)); // 常见全角字符 + } + + long t5 = System.currentTimeMillis(); + char sum3 = 0; + for (char c : fullWidthData) { + sum3 += CharUtil.toHalfWidth(c); + } + long t6 = System.currentTimeMillis(); + System.out.println("原始 toHalfWidth 耗时: " + (t6 - t5) + "ms, sum=" + sum3); + + // 测试新半角 + long t7 = System.currentTimeMillis(); + char sum4 = 0; + for (char c : fullWidthData) { + sum4 += InnerCharUtils.toHalfWidth(c); + } + long t8 = System.currentTimeMillis(); + System.out.println("优化 toHalfWidth 耗时: " + (t8 - t7) + "ms, sum=" + sum4); + } + + +} diff --git a/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java b/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java index 535f121..afc86eb 100644 --- a/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/data/DictSlimTest.java @@ -1,149 +1,149 @@ -package com.github.houbb.sensitive.word.data; - -import com.github.houbb.heaven.support.filter.IFilter; -import com.github.houbb.heaven.support.handler.IHandler; -import com.github.houbb.heaven.util.io.FileUtil; -import com.github.houbb.heaven.util.lang.StringUtil; -import com.github.houbb.heaven.util.util.CollectionUtil; -import com.github.houbb.opencc4j.core.impl.ZhConvertBootstrap; -import com.github.houbb.opencc4j.support.segment.impl.CharSegment; -import com.github.houbb.sensitive.word.utils.InnerWordNumUtils; -import org.junit.Ignore; -import org.junit.Test; - -import java.util.List; - -/** - * 数据初始化 - * @author binbin.hou - * @since 0.0.3 - */ -@Ignore -public class DictSlimTest { - - /** - * 统一格式 - * - * 1. 将所有的大写字母统一转换为小写 - * 2. 将所有的全角转换为半角 - * 3. 移除所有【空格】【符号】(这个就是各种符号的过滤了) - * 4. 繁体字统一转换为简体字 - * @since 0.0.3 - */ - @Test - @Ignore - public void formatTest() { - final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt"; - final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt"; - - List words = FileUtil.readAllLines(sourceFile); - - List formats = CollectionUtil.toList(words, new IHandler() { - @Override - public String handle(String string) { - String lower = string.toLowerCase(); - String half = StringUtil.toHalfWidth(lower); - String trim = StringUtil.trimAnyBlank(half); - String punc = StringUtil.trimAnyPunctionAndSymbol(trim); - return ZhConvertBootstrap.newInstance(new CharSegment()).toSimple(punc); - } - }); - - List resultList = DataUtil.disctinctAndSort(formats); - FileUtil.write(targetFile, resultList); - } - - /** - * 移除测试 - * - * 1. 移除 QQ 号的类似数字 - * 2. 移除所有网址(.com、cn、.org) - * 3. 移除纯英文 - * 4. 移除乱码 `�` - * 5. 移除英文+数字的 - * - * @since 0.0.3 - */ - @Test - @Ignore - public void removeTest() { - final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt"; - final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt"; - - List words = FileUtil.readAllLines(sourceFile); - - List formats = CollectionUtil.filterList(words, new IFilter() { - @Override - public boolean filter(String string) { - return StringUtil.isDigitOrLetter(string) - || string.contains("�") - || string.contains("删掉") - || isUrl(string); - } - }); - - List resultList = DataUtil.disctinctAndSort(formats); - FileUtil.write(targetFile, resultList); - } - - /** - * 数字映射处理 - * @since 0.0.4 - */ - @Test - public void removeNumberMappingTest() { - final String sourceFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt"; - final String targetFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt"; - - List words = FileUtil.readAllLines(sourceFile); - List formats = CollectionUtil.toList(words, new IHandler() { - @Override - public String handle(String s) { - return s.replaceAll(" ", ""); - } - }); - List filters = CollectionUtil.filterList(formats, new IFilter() { - @Override - public boolean filter(String string) { - return isNumber(string); - } - }); - - List resultList = DataUtil.disctinctAndSort(filters); - FileUtil.write(targetFile, resultList); - } - - /** - * 是否为存数字 - * (1)数字小于4的直接跳过。 - * @param string 原始字符串 - * @return 结果 - * @since 0.0.4 - */ - private static boolean isNumber(final String string) { - if(string.length() <= 4) { - return false; - } - - // 停顿词语 - String trim = string.replaceAll("加|否|与|和", ""); - String mapString = InnerWordNumUtils.getMappingString(trim); - boolean result = StringUtil.isDigit(mapString); - if(result) { - System.out.println(string); - } - return result; - } - - private static boolean isUrl(final String string) { - return string.endsWith(".com") - || string.endsWith(".cn") - || string.endsWith(".org"); - } - - public static void main(String[] args) { - String trim = "1和2".replaceAll("加|否|与|和", ""); - System.out.println(trim); - } - -} +//package com.github.houbb.sensitive.word.data; +// +//import com.github.houbb.heaven.support.filter.IFilter; +//import com.github.houbb.heaven.support.handler.IHandler; +//import com.github.houbb.heaven.util.io.FileUtil; +//import com.github.houbb.heaven.util.lang.StringUtil; +//import com.github.houbb.heaven.util.util.CollectionUtil; +//import com.github.houbb.opencc4j.core.impl.ZhConvertBootstrap; +//import com.github.houbb.opencc4j.support.segment.impl.CharSegment; +//import com.github.houbb.sensitive.word.utils.InnerWordNumUtils; +//import org.junit.Ignore; +//import org.junit.Test; +// +//import java.util.List; +// +///** +// * 数据初始化 +// * @author binbin.hou +// * @since 0.0.3 +// */ +//@Ignore +//public class DictSlimTest { +// +// /** +// * 统一格式 +// * +// * 1. 将所有的大写字母统一转换为小写 +// * 2. 将所有的全角转换为半角 +// * 3. 移除所有【空格】【符号】(这个就是各种符号的过滤了) +// * 4. 繁体字统一转换为简体字 +// * @since 0.0.3 +// */ +// @Test +// @Ignore +// public void formatTest() { +// final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt"; +// final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt"; +// +// List words = FileUtil.readAllLines(sourceFile); +// +// List formats = CollectionUtil.toList(words, new IHandler() { +// @Override +// public String handle(String string) { +// String lower = string.toLowerCase(); +// String half = StringUtil.toHalfWidth(lower); +// String trim = StringUtil.trimAnyBlank(half); +// String punc = StringUtil.trimAnyPunctionAndSymbol(trim); +// return ZhConvertBootstrap.newInstance(new CharSegment()).toSimple(punc); +// } +// }); +// +// List resultList = DataUtil.disctinctAndSort(formats); +// FileUtil.write(targetFile, resultList); +// } +// +// /** +// * 移除测试 +// * +// * 1. 移除 QQ 号的类似数字 +// * 2. 移除所有网址(.com、cn、.org) +// * 3. 移除纯英文 +// * 4. 移除乱码 `�` +// * 5. 移除英文+数字的 +// * +// * @since 0.0.3 +// */ +// @Test +// @Ignore +// public void removeTest() { +// final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt"; +// final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt"; +// +// List words = FileUtil.readAllLines(sourceFile); +// +// List formats = CollectionUtil.filterList(words, new IFilter() { +// @Override +// public boolean filter(String string) { +// return StringUtil.isDigitOrLetter(string) +// || string.contains("�") +// || string.contains("删掉") +// || isUrl(string); +// } +// }); +// +// List resultList = DataUtil.disctinctAndSort(formats); +// FileUtil.write(targetFile, resultList); +// } +// +// /** +// * 数字映射处理 +// * @since 0.0.4 +// */ +// @Test +// public void removeNumberMappingTest() { +// final String sourceFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt"; +// final String targetFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt"; +// +// List words = FileUtil.readAllLines(sourceFile); +// List formats = CollectionUtil.toList(words, new IHandler() { +// @Override +// public String handle(String s) { +// return s.replaceAll(" ", ""); +// } +// }); +// List filters = CollectionUtil.filterList(formats, new IFilter() { +// @Override +// public boolean filter(String string) { +// return isNumber(string); +// } +// }); +// +// List resultList = DataUtil.disctinctAndSort(filters); +// FileUtil.write(targetFile, resultList); +// } +// +// /** +// * 是否为存数字 +// * (1)数字小于4的直接跳过。 +// * @param string 原始字符串 +// * @return 结果 +// * @since 0.0.4 +// */ +// private static boolean isNumber(final String string) { +// if(string.length() <= 4) { +// return false; +// } +// +// // 停顿词语 +// String trim = string.replaceAll("加|否|与|和", ""); +//// String mapString = InnerWordNumUtils.getMappingString(trim); +//// boolean result = StringUtil.isDigit(mapString); +//// if(result) { +//// System.out.println(string); +//// } +//// return result; +// } +// +// private static boolean isUrl(final String string) { +// return string.endsWith(".com") +// || string.endsWith(".cn") +// || string.endsWith(".org"); +// } +// +// public static void main(String[] args) { +// String trim = "1和2".replaceAll("加|否|与|和", ""); +// System.out.println(trim); +// } +// +//} diff --git a/src/test/java/com/github/houbb/sensitive/word/support/format/EnglishStylePerfTest.java b/src/test/java/com/github/houbb/sensitive/word/support/format/EnglishStylePerfTest.java new file mode 100644 index 0000000..3ec8580 --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/support/format/EnglishStylePerfTest.java @@ -0,0 +1,46 @@ +package com.github.houbb.sensitive.word.support.format; + +import com.github.houbb.sensitive.word.api.IWordContext; +import com.github.houbb.sensitive.word.api.IWordFormat; + +public class EnglishStylePerfTest { + + public static void main(String[] args) { + final int times = 200000; + + + // 不涉及 + IWordContext context = null; + + // 每次随机选择? + String demo1 = "产品尺寸参数§60mn§50mm§210枚/包§160枚/包§名称A4银色不干胶§规格60mm*40mm 送配套模板§规格70mm*50mm 送配套模板§数量每大张21枚一包10张总计210枚§数量每大张16枚一包10张总计160枚§适用激光打印机打印油性笔书写§95mm§100mn§55mm§100枚/包§80枚/包§名称 A4银色不干胶§规格95mm*55mm 送配套模板§规格100mm*70mm 送配套模板§数量每大张10枚一包10张总计100枚§数量 每大张8枚一包10张 总计80枚§100mm§120枚/包§140枚/包§规格80mm*50mm 送配套模板§规格100mm*40mm 送配套模板§数量每大张12枚一包10张总计120枚§数量§每大张14枚包10张总计140枚§适用 激光打印机打印油性笔书写§40mm§65mm§70mm§35mm§200枚/包§240枚/包§规格70mm*40mm送配套模板§规格§65mm*35mm 送配套模板§数量 每大张20枚一包10张总计200枚§每大张24枚包10张总计240枚§适 激光打印机打印油性笔书写§适用§激光打印机打印油性笔书写§40mn§280枚/包§360枚/包§规格50mm*40mm 送配套模板§规格40mm*30mm 送配套模板§数量每大张28枚一包10张总计280枚§数量每大张36枚一包10张总计360枚§45.7mm§38.1mm§400枚/包§650枚/包§45.7mm*25.4mm送配套模板§38.1mm*21.2mm 送配套模板§每大张40枚一包10张总计400枚§数量每大张65枚一包10张总计650枚§30mm§25mr§20mm§840枚/包§1260枚/包§规格 30mm*20mm 送配套模板§规格25mm*13mm 送配套模板§数量每张84枚包10张总计840枚§数量每大张126枚一包10张总计1260枚§46mm§意制§任§1000枚/包§定§名称定制A4内割银不胶§规格46mm*11.1mm送配套模板§任意规格定制§每大张100枚包10张总计1000枚§包10张满5包送专属模板§适激光打印机打印油性笔书写§产品实拍§8格打印实拍展示(100mm*70mm)§上海荠骞文化用品固定资产标识卡§资产编号:§规格型号:§资产名称:§使用状态:§资产类别:§资产原值§存放地点§生产厂家:§使用人§备§注:§*请爱护公司财产,不要随意撕毁此标签§16格全内容打印实拍展示§固定资产标识卡§资产名称§四层货架(平板)§资产编号§3F跑菜区§规格型号§1800×500×1500§使用部门§财务部§使用时间§2019-04-26§李强§21格手写款打印展示 (60mm*40mm)§固定资标识卡§36格打印实拍展示(40mm*30mm)§固定资产标签§名称:§编号:§部门:§40格打印实拍展示(45.7mm*25.4mm)§固定资§名称:电脑§编号:20210§部门:财务部§20210201§使用人:我最强§八:找最强§编号:20210201§65格打印实拍展示(38mm*21mm)§名称:§编号:§数量:§数量:§100格打印实拍展示(46mm*11.1mm)§客服电话:159 9569 3815§: 159 9569 3815§.§客服电话:159 9569§客服电话:1599§客服电话§服电话:159 9569 3815§话:159 9569 3815§客服电话:1599569 3815§电话:159 9569 3815§9569 3815§159 9569 3815§客服电话:§低值易耗品标识牌(70mm*50mm)§购买日期§保管部门§责任人§生产厂家§不要随意撕毁此标牌*§*请爱护公司财产,不要随意撕导§品标识牌§低值易耗品标识牌§随意撕毁此标牌*§*请爱护公司财产,不要随意撕毁此标牌*§三人沙发§行政酒廊§2200*860*900§2018-07-23§应用范围§多用于产品信息固有资产登记航空仓库管理 医疗政府机构等§Mainly used for product information inherent assets registration, aviation warehouse management, medi§cal government institutions, etc§政府单位§企业办公§仓储行业§医疗器械§教育单位§耐用品§电子产品包装§商城卖场"; + // hash + cost1(demo1, times, context); + cost2(demo1, times, context); + } + + private static void cost1(String text, int times, IWordContext context) { + IWordFormat hashMap = new WordFormatIgnoreEnglishStyle(); + + long s1 = System.currentTimeMillis(); + for(int i = 0; i < times; i++) { + char c = text.charAt(i % text.length()); + hashMap.format(c, context); + } + long cost = System.currentTimeMillis() - s1; + System.out.println(cost); + } + + private static void cost2(String text, int times, IWordContext context) { + IWordFormat hashMap = new WordFormatIgnoreEnglishStyleC2C(); + + long s1 = System.currentTimeMillis(); + for(int i = 0; i < times; i++) { + char c = text.charAt(i % text.length()); + hashMap.format(c, context); + } + long cost = System.currentTimeMillis() - s1; + System.out.println(cost); + } + +} diff --git a/src/test/java/com/github/houbb/sensitive/word/support/format/package-info.java b/src/test/java/com/github/houbb/sensitive/word/support/format/package-info.java new file mode 100644 index 0000000..4e67d83 --- /dev/null +++ b/src/test/java/com/github/houbb/sensitive/word/support/format/package-info.java @@ -0,0 +1 @@ +package com.github.houbb.sensitive.word.support.format; \ No newline at end of file