diff --git a/README.md b/README.md index cc3c767..d435551 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ com.github.houbb sensitive-word - 0.0.1 + 0.0.2 ``` @@ -51,7 +51,7 @@ ### 判断是否包含敏感词 ```java -final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。。"; +final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; Assert.assertTrue(SensitiveWordBs.getInstance().contains(text)); ``` diff --git a/doc/CHANGE_LOG.md b/doc/CHANGE_LOG.md index 565693a..0bd9ef4 100644 --- a/doc/CHANGE_LOG.md +++ b/doc/CHANGE_LOG.md @@ -14,3 +14,12 @@ | 序号 | 变更类型 | 说明 | 时间 | 备注 | |:---|:---|:---|:---|:--| | 1 | A | 基本功能的实现 | 2020-1-7 21:46:32 | | + +# release_0.0.2 + +| 序号 | 变更类型 | 说明 | 时间 | 备注 | +|:---|:---|:---|:---|:--| +| 1 | O | 优化最大长度匹配模式 | 2020-1-8 09:34:35 | | +| 2 | A | 新增替换实现 | 2020-1-8 09:34:35 | 性能优于各种博客的直接正则替换。| +| 3 | O | 优化公共代码到 heaven 项目 | 2020-1-8 09:34:35 | 便于后期统一维护整理。| +| 4 | O | 初步优化 DFA 对应 map 的大小 | 2020-1-8 09:34:35 | | \ No newline at end of file diff --git a/doc/issues/关联框架.md b/doc/issues/关联框架.md index 2389f0d..3fa6900 100644 --- a/doc/issues/关联框架.md +++ b/doc/issues/关联框架.md @@ -8,4 +8,10 @@ 中文英文转换 -手写 Regex \ No newline at end of file +手写 Regex + +## 核心原理 + +DFA 算法 + +根据又穷状态机去处理。 \ No newline at end of file diff --git a/pom.xml b/pom.xml index 0324728..41daf20 100644 --- a/pom.xml +++ b/pom.xml @@ -25,7 +25,7 @@ 1.7 - 0.1.66 + 0.1.67-SNAPSHOT 4.12 diff --git a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java index e004b61..39a6849 100644 --- a/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordMap.java @@ -47,4 +47,16 @@ public interface IWordMap { */ String findFirst(final String string); + /** + * 替换所有敏感词内容 + * + * ps: 这里可以添加优化。 + * + * @param target 目标字符串 + * @param replaceChar 替换为的 char + * @return 替换后结果 + * @since 0.0.2 + */ + String replace(final String target, final char replaceChar); + } diff --git a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java index 1cba7a1..95fec06 100644 --- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java +++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java @@ -1,9 +1,7 @@ package com.github.houbb.sensitive.word.bs; +import com.github.houbb.heaven.constant.CharConst; import com.github.houbb.heaven.support.instance.impl.Instances; -import com.github.houbb.heaven.util.guava.Guavas; -import com.github.houbb.heaven.util.lang.StringUtil; -import com.github.houbb.heaven.util.util.CollectionUtil; import com.github.houbb.sensitive.word.api.IWordData; import com.github.houbb.sensitive.word.api.IWordMap; import com.github.houbb.sensitive.word.support.data.SensitiveWordData; @@ -59,16 +57,6 @@ public class SensitiveWordBs { return INSTANCE; } - /** - * 是否合法 - * @param target 目标字符串 - * @return 是否 - * @since 0.0.1 - */ - public boolean valid(final String target) { - return !contains(target); - } - /** * 是否包含敏感词 * @param target 目标字符串 @@ -102,4 +90,25 @@ public class SensitiveWordBs { return this.sensitiveWordMap.findFirst(target); } + /** + * 替换所有内容 + * @param target 目标字符串 + * @param replaceChar 替换为的 char + * @return 替换后结果 + * @since 0.0.2 + */ + public String replace(final String target, final char replaceChar) { + return this.sensitiveWordMap.replace(target, replaceChar); + } + + /** + * 替换所有内容 + * @param target 目标字符串 + * @return 替换后结果 + * @since 0.0.2 + */ + public String replace(final String target) { + return this.replace(target, CharConst.STAR); + } + } diff --git a/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java b/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java index 8d4d059..f041231 100644 --- a/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java +++ b/src/main/java/com/github/houbb/sensitive/word/constant/AppConst.java @@ -9,11 +9,20 @@ package com.github.houbb.sensitive.word.constant; */ public final class AppConst { + private AppConst(){} + /** * 是否为结束标识 * ps: 某种角度而言,我不是很喜欢这种风格。 + * (1)正常的 char 只會占用一個字符,这里直接给定两个字符即可,降低 Map 的容量。 * @since 0.0.1 */ - public static final String IS_END = "isEnd"; + public static final String IS_END = "ED"; + + /** + * 字典的大小 + * @since 0.0.1 + */ + public static final int DICT_SIZE = 183836; } diff --git a/src/main/java/com/github/houbb/sensitive/word/model/CheckSensitiveWordResult.java b/src/main/java/com/github/houbb/sensitive/word/model/CheckSensitiveWordResult.java new file mode 100644 index 0000000..9c4d030 --- /dev/null +++ b/src/main/java/com/github/houbb/sensitive/word/model/CheckSensitiveWordResult.java @@ -0,0 +1,69 @@ +package com.github.houbb.sensitive.word.model; + +/** + * 检测敏感词结果 + * + * TODO: 这里需要结合 KMP 和 暴力匹配算法。 + * + * 暂时不使用,后期会使用到。 + * @author binbin.hou + * @since 0.0.2 + */ +@Deprecated +public class CheckSensitiveWordResult { + + /** + * 是否匹配到了敏感词 + * @since 0.0.2 + */ + private boolean hasMatch; + + /** + * 敏感词长度 + * @since 0.0.2 + */ + private int sensitiveWordSize; + + /** + * 普通单词的长度 + * @since 0.0.2 + */ + private int commonWordSize; + + public boolean hasMatch() { + return hasMatch; + } + + public CheckSensitiveWordResult hasMatch(boolean hasMatch) { + this.hasMatch = hasMatch; + return this; + } + + public int sentiveWordSize() { + return sensitiveWordSize; + } + + public CheckSensitiveWordResult sentiveWordSize(int sentiveWordSize) { + this.sensitiveWordSize = sentiveWordSize; + return this; + } + + public int commonWordSize() { + return commonWordSize; + } + + public CheckSensitiveWordResult commonWordSize(int commonWordSize) { + this.commonWordSize = commonWordSize; + return this; + } + + @Override + public String toString() { + return "CheckSensitiveWordResult{" + + "hasMatch=" + hasMatch + + ", sensitiveWordSize=" + sensitiveWordSize + + ", commonWordSize=" + commonWordSize + + '}'; + } + +} diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java b/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java index bae6890..b35e1b7 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/data/SensitiveWordData.java @@ -1,10 +1,11 @@ package com.github.houbb.sensitive.word.support.data; import com.github.houbb.heaven.annotation.ThreadSafe; +import com.github.houbb.heaven.util.guava.Guavas; +import com.github.houbb.heaven.util.io.StreamUtil; import com.github.houbb.sensitive.word.api.IWordData; -import com.github.houbb.sensitive.word.util.StreamUtils; +import com.github.houbb.sensitive.word.constant.AppConst; -import java.util.ArrayList; import java.util.List; /** @@ -26,8 +27,8 @@ public class SensitiveWordData implements IWordData { static { synchronized (SensitiveWordData.class) { long start = System.currentTimeMillis(); - defaultLines = new ArrayList<>(183836); - defaultLines = StreamUtils.readAllLines("/dict.txt"); + defaultLines = Guavas.newArrayList(AppConst.DICT_SIZE); + defaultLines = StreamUtil.readAllLines("/dict.txt"); long end = System.currentTimeMillis(); System.out.println("Sensitive data loaded!, cost time: " + (end - start) + " ms"); } diff --git a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java index b440f90..870acb0 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java @@ -2,6 +2,7 @@ package com.github.houbb.sensitive.word.support.map; import com.github.houbb.heaven.annotation.ThreadSafe; import com.github.houbb.heaven.util.guava.Guavas; +import com.github.houbb.heaven.util.lang.CharUtil; import com.github.houbb.heaven.util.lang.ObjectUtil; import com.github.houbb.heaven.util.lang.StringUtil; import com.github.houbb.heaven.util.util.CollectionUtil; @@ -95,7 +96,7 @@ public class SensitiveWordMap implements IWordMap { } long endTime = System.currentTimeMillis(); - System.out.println("Init sensitive word map end! Cost time " + (endTime-startTime) + "ms"); + System.out.println("Init sensitive word map end! Cost time " + (endTime - startTime) + "ms"); } /** @@ -114,7 +115,7 @@ public class SensitiveWordMap implements IWordMap { } for (int i = 0; i < string.length(); i++) { - int checkResult = checkSensitiveWord(string, i); + int checkResult = checkSensitiveWord(string, i, ValidModeEnum.FAIL_FAST); // 快速返回 if (checkResult > 0) { return true; @@ -148,6 +149,15 @@ public class SensitiveWordMap implements IWordMap { return stringList.get(0); } + @Override + public String replace(String target, char replaceChar) { + if(StringUtil.isEmpty(target)) { + return target; + } + + return this.replaceSensitiveWord(target, ValidModeEnum.FAIL_OVER, replaceChar); + } + /** * 获取敏感词列表 * @@ -164,15 +174,15 @@ public class SensitiveWordMap implements IWordMap { List resultList = Guavas.newArrayList(); for (int i = 0; i < text.length(); i++) { - int wordLength = checkSensitiveWord(text, i); + int wordLength = checkSensitiveWord(text, i, ValidModeEnum.FAIL_OVER); // 命中 if (wordLength > 0) { // 保存敏感词 - String sensitiveWord = text.substring(i, i+wordLength); + String sensitiveWord = text.substring(i, i + wordLength); // 添加去重 - if(!resultList.contains(sensitiveWord)) { + if (!resultList.contains(sensitiveWord)) { resultList.add(sensitiveWord); } @@ -183,6 +193,7 @@ public class SensitiveWordMap implements IWordMap { // 增加 i 的步长 // 为什么要-1,因为默认就会自增1 + // TODO: 这里可以根据字符串匹配算法优化。 i += wordLength - 1; } } @@ -196,17 +207,24 @@ public class SensitiveWordMap implements IWordMap { * (1)如果未命中敏感词,直接返回 0 * (2)命中敏感词,则返回敏感词的长度。 * - * @param txt 文本信息 - * @param beginIndex 开始下标 + * ps: 这里结果进行优化, + * 1. 是否包含敏感词。 + * 2. 敏感词的长度 + * 3. 正常走过字段的长度(便于后期替换优化,避免不必要的循环重复) + * + * @param txt 文本信息 + * @param beginIndex 开始下标 + * @param validModeEnum 验证模式 * @return 敏感词对应的长度 * @since 0.0.1 */ - private int checkSensitiveWord(String txt, int beginIndex) { + private int checkSensitiveWord(final String txt, final int beginIndex, + final ValidModeEnum validModeEnum) { Map nowMap = sensitiveWordMap; - boolean flag = false; // 记录敏感词的长度 - int sensitiveWordLength = 0; + int lengthCount = 0; + int actualLength = 0; for (int i = beginIndex; i < txt.length(); i++) { char charKey = txt.charAt(i); @@ -214,15 +232,20 @@ public class SensitiveWordMap implements IWordMap { // 并且将 nowMap 替换为新的 map,进入下一层的循环。 nowMap = (Map) nowMap.get(charKey); if (ObjectUtil.isNotNull(nowMap)) { - sensitiveWordLength++; + lengthCount++; // 判断是否是敏感词的结尾字,如果是结尾字则判断是否继续检测 boolean isEnd = (boolean) nowMap.get(AppConst.IS_END); if (isEnd) { - flag = true; + // 只在匹配到结束的时候才记录长度,避免不完全匹配导致的问题。 + // eg: 敏感词 敏感词xxx + // 如果是 【敏感词x】也会被匹配。 + actualLength = lengthCount; - // 这里直接默认 fail-fast 即可。 - break; + // 这里确实需要一种验证模式,主要是为了最大匹配从而达到最佳匹配的效果。 + if (ValidModeEnum.FAIL_FAST.equals(validModeEnum)) { + break; + } } } else { // 直接跳出循环 @@ -230,10 +253,44 @@ public class SensitiveWordMap implements IWordMap { } } - if (!flag) { - sensitiveWordLength = 0; + return actualLength; + } + + /** + * 直接替换敏感词,返回替换后的结果 + * @param target 文本信息 + * @param validModeEnum 验证模式 + * @return 脱敏后的字符串 + * @since 0.0.2 + */ + private String replaceSensitiveWord(final String target, + final ValidModeEnum validModeEnum, + final char replaceChar) { + if(StringUtil.isEmpty(target)) { + return target; } - return sensitiveWordLength; + // 用于结果构建 + StringBuilder resultBuilder = new StringBuilder(); + + for (int i = 0; i < target.length(); i++) { + char currentChar = target.charAt(i); + // 内层直接从 i 开始往后遍历,这个算法的,获取第一个匹配的单词 + int wordLength = checkSensitiveWord(target, i, validModeEnum); + + // 敏感词 + if(wordLength > 0) { + String replaceStr = CharUtil.repeat(replaceChar, wordLength); + resultBuilder.append(replaceStr); + + // 直接跳过敏感词的长度 + i += wordLength-1; + } else { + // 普通词 + resultBuilder.append(currentChar); + } + } + + return resultBuilder.toString(); } } diff --git a/src/main/java/com/github/houbb/sensitive/word/util/CharsetUtils.java b/src/main/java/com/github/houbb/sensitive/word/util/CharsetUtils.java index e3d0fc3..20ad14f 100644 --- a/src/main/java/com/github/houbb/sensitive/word/util/CharsetUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/util/CharsetUtils.java @@ -1,70 +1,70 @@ -package com.github.houbb.sensitive.word.util; - -import com.github.houbb.heaven.annotation.CommonEager; -import com.github.houbb.heaven.util.lang.StringUtil; - -/** - * @author binbin.hou - * @since 0.0.1 - */ -@CommonEager -public class CharsetUtils { - - /** - * 是否为中文字符 - * @param c char - * @return 是否 - * @since 0.0.1 - */ - public static boolean isChinese(char c) { - boolean result = false; - // 汉字范围 \u4e00-\u9fa5 (中文) - if (c >= 19968 && c <= 171941) { - result = true; - } - return result; - } - - /** - * 是否包含中文 - * @param string 字符串 - * @return 是否 - * @since 0.0.1 - */ - public static boolean isContainsChinese(String string) { - if(StringUtil.isEmpty(string)) { - return false; - } - - char[] chars = string.toCharArray(); - for(char c : chars) { - if(isChinese(c)) { - return true; - } - } - - return false; - } - - /** - * 是否全是中文 - * @param string 字符串 - * @return 是否 - * @since 0.0.1 - */ - public static boolean isAllChinese(String string) { - if(StringUtil.isEmpty(string)) { - return false; - } - - char[] chars = string.toCharArray(); - for(char c : chars) { - if(!isChinese(c)) { - return false; - } - } - - return true; - } - -} +//package com.github.houbb.sensitive.word.util; +// +//import com.github.houbb.heaven.annotation.CommonEager; +//import com.github.houbb.heaven.util.lang.StringUtil; +// +///** +// * @author binbin.hou +// * @since 0.0.1 +// */ +//@CommonEager +//public class CharsetUtils { +// +// /** +// * 是否为中文字符 +// * @param c char +// * @return 是否 +// * @since 0.0.1 +// */ +// public static boolean isChinese(char c) { +// boolean result = false; +// // 汉字范围 \u4e00-\u9fa5 (中文) +// if (c >= 19968 && c <= 171941) { +// result = true; +// } +// return result; +// } +// +// /** +// * 是否包含中文 +// * @param string 字符串 +// * @return 是否 +// * @since 0.0.1 +// */ +// public static boolean isContainsChinese(String string) { +// if(StringUtil.isEmpty(string)) { +// return false; +// } +// +// char[] chars = string.toCharArray(); +// for(char c : chars) { +// if(isChinese(c)) { +// return true; +// } +// } +// +// return false; +// } +// +// /** +// * 是否全是中文 +// * @param string 字符串 +// * @return 是否 +// * @since 0.0.1 +// */ +// public static boolean isAllChinese(String string) { +// if(StringUtil.isEmpty(string)) { +// return false; +// } +// +// char[] chars = string.toCharArray(); +// for(char c : chars) { +// if(!isChinese(c)) { +// return false; +// } +// } +// +// return true; +// } +// +//} diff --git a/src/main/java/com/github/houbb/sensitive/word/util/StreamUtils.java b/src/main/java/com/github/houbb/sensitive/word/util/StreamUtils.java index dbb1014..2d3b624 100644 --- a/src/main/java/com/github/houbb/sensitive/word/util/StreamUtils.java +++ b/src/main/java/com/github/houbb/sensitive/word/util/StreamUtils.java @@ -1,72 +1,72 @@ -package com.github.houbb.sensitive.word.util; - -import com.github.houbb.heaven.annotation.CommonEager; -import com.github.houbb.heaven.constant.CharsetConst; -import com.github.houbb.heaven.util.lang.StringUtil; -import com.github.houbb.sensitive.word.exception.SensitiveWordException; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.nio.charset.Charset; -import java.util.*; - -/** - * 流工具类 - * @author binbin.hou - * @since 0.0.1 - */ -@CommonEager -public final class StreamUtils { - - private StreamUtils(){} - - /** - * 构建数据集合 - * - * 后期考虑:是否允许用户自定义字典? - * 目前不支持这些操作。后期如果需要,再把这些限制放开。 - * @param path 文件路径 - * @return 返回数据集合 - * @since 0.0.1 - */ - public static List readAllLines(final String path) { - return readAllLines(path, CharsetConst.UTF8, true); - } - - /** - * 构建数据集合 - * - * 后期考虑:是否允许用户自定义字典? - * 目前不支持这些操作。后期如果需要,再把这些限制放开。 - * @param path 文件路径 - * @param charset 文件编码 - * @param ignoreEmpty 是否忽略空白行 - * @return 返回数据集合 - * @since 0.0.1 - */ - public static List readAllLines(final String path, - final String charset, - final boolean ignoreEmpty) { - try { - List lines = new ArrayList<>(); - InputStream is = StreamUtils.class.getResourceAsStream(path); - BufferedReader e = new BufferedReader(new InputStreamReader(is, - Charset.forName(charset))); - - while (e.ready()) { - String entry = e.readLine(); - if (StringUtil.isEmpty(entry) - && ignoreEmpty) { - continue; - } - lines.add(entry); - } - return lines; - } catch (IOException e) { - throw new SensitiveWordException("dict init failed!", e); - } - } - -} +//package com.github.houbb.sensitive.word.util; +// +//import com.github.houbb.heaven.annotation.CommonEager; +//import com.github.houbb.heaven.constant.CharsetConst; +//import com.github.houbb.heaven.util.lang.StringUtil; +//import com.github.houbb.sensitive.word.exception.SensitiveWordException; +// +//import java.io.BufferedReader; +//import java.io.IOException; +//import java.io.InputStream; +//import java.io.InputStreamReader; +//import java.nio.charset.Charset; +//import java.util.*; +// +///** +// * 流工具类 +// * @author binbin.hou +// * @since 0.0.1 +// */ +//@CommonEager +//public final class StreamUtils { +// +// private StreamUtils(){} +// +// /** +// * 构建数据集合 +// * +// * 后期考虑:是否允许用户自定义字典? +// * 目前不支持这些操作。后期如果需要,再把这些限制放开。 +// * @param path 文件路径 +// * @return 返回数据集合 +// * @since 0.0.1 +// */ +// public static List readAllLines(final String path) { +// return readAllLines(path, CharsetConst.UTF8, true); +// } +// +// /** +// * 构建数据集合 +// * +// * 后期考虑:是否允许用户自定义字典? +// * 目前不支持这些操作。后期如果需要,再把这些限制放开。 +// * @param path 文件路径 +// * @param charset 文件编码 +// * @param ignoreEmpty 是否忽略空白行 +// * @return 返回数据集合 +// * @since 0.0.1 +// */ +// public static List readAllLines(final String path, +// final String charset, +// final boolean ignoreEmpty) { +// try { +// List lines = new ArrayList<>(); +// InputStream is = StreamUtils.class.getResourceAsStream(path); +// BufferedReader e = new BufferedReader(new InputStreamReader(is, +// Charset.forName(charset))); +// +// while (e.ready()) { +// String entry = e.readLine(); +// if (StringUtil.isEmpty(entry) +// && ignoreEmpty) { +// continue; +// } +// lines.add(entry); +// } +// return lines; +// } catch (IOException e) { +// throw new SensitiveWordException("dict init failed!", e); +// } +// } +// +//} diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java index d48d364..bfc3ecd 100644 --- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java @@ -14,13 +14,21 @@ import java.util.List; */ public class SensitiveWordBsTest { + /** + * 是否包含 + * @since 0.0.1 + */ @Test public void containsTest() { - final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。。"; + final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; Assert.assertTrue(SensitiveWordBs.getInstance().contains(text)); } + /** + * 返回所有敏感词 + * @since 0.0.1 + */ @Test public void findAllTest() { final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; @@ -29,6 +37,10 @@ public class SensitiveWordBsTest { Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString()); } + /** + * 返回所有第一个匹配的敏感词 + * @since 0.0.1 + */ @Test public void findFirstTest() { final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; @@ -37,4 +49,28 @@ public class SensitiveWordBsTest { Assert.assertEquals("五星红旗", word); } + /** + * 默认的替换策略 + * @since 0.0.2 + */ + @Test + public void replaceTest() { + final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; + + String result = SensitiveWordBs.getInstance().replace(text); + Assert.assertEquals("****迎风飘扬,***的画像屹立在***前。", result); + } + + /** + * 自定义字符的替换策略 + * @since 0.0.2 + */ + @Test + public void replaceCharTest() { + final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。"; + + String result = SensitiveWordBs.getInstance().replace(text, '0'); + Assert.assertEquals("0000迎风飘扬,000的画像屹立在000前。", result); + } + } diff --git a/src/test/java/com/github/houbb/sensitive/word/data/DataInitTest.java b/src/test/java/com/github/houbb/sensitive/word/data/DataInitTest.java index 8a76d4d..fd0ab4b 100644 --- a/src/test/java/com/github/houbb/sensitive/word/data/DataInitTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/data/DataInitTest.java @@ -3,8 +3,8 @@ package com.github.houbb.sensitive.word.data; import com.github.houbb.heaven.support.filter.IFilter; import com.github.houbb.heaven.util.io.FileUtil; import com.github.houbb.heaven.util.lang.StringUtil; +import com.github.houbb.heaven.util.util.CharsetUtil; import com.github.houbb.heaven.util.util.CollectionUtil; -import com.github.houbb.sensitive.word.util.CharsetUtils; import org.junit.Ignore; import org.junit.Test; @@ -38,7 +38,6 @@ public class DataInitTest { List trimLines = CollectionUtil.distinct(CollectionUtil.trimCollection(lines)); final String target = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt"; - FileUtil.write(target, trimLines); } /** @@ -65,7 +64,7 @@ public class DataInitTest { List resultList = CollectionUtil.distinct(CollectionUtil.filterList(lines, new IFilter() { @Override public boolean filter(String s) { - return CharsetUtils.isContainsChinese(s); + return CharsetUtil.isContainsChinese(s); } })); Collections.sort(resultList); diff --git a/src/test/java/com/github/houbb/sensitive/word/util/StreamUtilsTest.java b/src/test/java/com/github/houbb/sensitive/word/util/StreamUtilsTest.java deleted file mode 100644 index 1b5e8d9..0000000 --- a/src/test/java/com/github/houbb/sensitive/word/util/StreamUtilsTest.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.github.houbb.sensitive.word.util; - -import org.junit.Assert; -import org.junit.Test; - -import java.util.List; - -/** - * @author binbin.hou - * @since 0.0.1 - */ -public class StreamUtilsTest { - - @Test - public void sizeTest() { - final String dictPath = "/dict.txt"; - - List stringList = StreamUtils.readAllLines(dictPath); - Assert.assertEquals(183836, stringList.size()); - } - -}