subNodeMap;
+
+ public boolean end() {
+ return end;
+ }
+
+ public WordDataTreeNode end(boolean end) {
+ this.end = end;
+ return this;
+ }
+
+ public WordDataTreeNode getSubNode(final char c) {
+ if(subNodeMap == null) {
+ return null;
+ }
+
+ return subNodeMap.get(c);
+ }
+
+ public WordDataTreeNode addSubNode(char c, WordDataTreeNode subNode) {
+ if(this.subNodeMap == null) {
+ subNodeMap = new HashMap<>();
+ }
+
+ subNodeMap.put(c, subNode);
+ return this;
+ }
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/data/WordDatas.java b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDatas.java
new file mode 100644
index 0000000..28333aa
--- /dev/null
+++ b/src/main/java/com/github/houbb/sensitive/word/support/data/WordDatas.java
@@ -0,0 +1,43 @@
+package com.github.houbb.sensitive.word.support.data;
+
+import com.github.houbb.sensitive.word.api.IWordData;
+
+/**
+ * 敏感词 map
+ *
+ * @author binbin.hou
+ * @since 0.3.0
+ */
+public final class WordDatas {
+
+ private WordDatas(){}
+
+ /**
+ * 默认策略
+ * @return 策略
+ * @since 0.3.0
+ */
+ public static IWordData defaults() {
+ return tree();
+ }
+
+ /**
+ * 树模式
+ * @return 树
+ * @since 0.7.0
+ */
+ public static IWordData tree() {
+ return new WordDataTree();
+ }
+
+ /**
+ * 树模式
+ * @return 树
+ * @since 0.7.0
+ */
+ public static IWordData hashMap() {
+ return new WordDataHashMap();
+ }
+
+
+}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java
index e80be0e..dfcd524 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/deny/WordDenys.java
@@ -41,7 +41,7 @@ public final class WordDenys {
* @return 结果
* @since 0.0.13
*/
- public static IWordDeny system() {
+ public static IWordDeny defaults() {
return WordDenySystem.getInstance();
}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java b/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
deleted file mode 100644
index b98c064..0000000
--- a/src/main/java/com/github/houbb/sensitive/word/support/map/SensitiveWordMap.java
+++ /dev/null
@@ -1,265 +0,0 @@
-//package com.github.houbb.sensitive.word.support.map;
-//
-//import com.github.houbb.heaven.annotation.ThreadSafe;
-//import com.github.houbb.heaven.util.guava.Guavas;
-//import com.github.houbb.heaven.util.io.FileUtil;
-//import com.github.houbb.heaven.util.lang.ObjectUtil;
-//import com.github.houbb.heaven.util.lang.StringUtil;
-//import com.github.houbb.heaven.util.util.CollectionUtil;
-//import com.github.houbb.sensitive.word.api.*;
-//import com.github.houbb.sensitive.word.constant.AppConst;
-//import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
-//import com.github.houbb.sensitive.word.support.check.SensitiveCheckResult;
-//import com.github.houbb.sensitive.word.support.check.impl.SensitiveCheckUrl;
-//import com.github.houbb.sensitive.word.support.replace.SensitiveWordReplaceContext;
-//import com.github.houbb.sensitive.word.support.result.WordResult;
-//
-//import java.util.Collection;
-//import java.util.HashMap;
-//import java.util.List;
-//import java.util.Map;
-//
-///**
-// * 敏感词 map
-// *
-// * @author binbin.hou
-// * @since 0.0.1
-// */
-//@ThreadSafe
-//public class SensitiveWordMap implements IWordMap {
-//
-// /**
-// * 脱敏单词 map
-// *
-// * @since 0.0.1
-// */
-// private Map innerWordMap;
-//
-// /**
-// * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:
-// *
-// * @param collection 敏感词库集合
-// * @since 0.0.1
-// *
-// * 使用对象代码 map 的这种一直递归。
-// * 参考资料:https://www.cnblogs.com/AlanLee/p/5329555.html
-// * https://blog.csdn.net/chenssy/article/details/26961957
-// */
-// @Override
-// @SuppressWarnings("unchecked")
-// public synchronized void initWordMap(Collection collection) {
-// // 避免扩容带来的消耗
-// Map newInnerWordMap = new HashMap(collection.size());
-//
-// for (String key : collection) {
-// if (StringUtil.isEmpty(key)) {
-// continue;
-// }
-//
-// // 用来按照相应的格式保存敏感词库数据
-// char[] chars = key.toCharArray();
-// final int size = chars.length;
-//
-// // 每一个新词的循环,直接将结果设置为当前 map,所有变化都会体现在结果的 map 中
-// Map currentMap = newInnerWordMap;
-//
-// for (int i = 0; i < size; i++) {
-// // 截取敏感词当中的字,在敏感词库中字为HashMap对象的Key键值
-// char charKey = chars[i];
-// // 如果集合存在
-// Object wordMap = currentMap.get(charKey);
-//
-// // 如果集合存在
-// if (ObjectUtil.isNotNull(wordMap)) {
-// // 直接将获取到的 map 当前当前 map 进行继续的操作
-// currentMap = (Map) wordMap;
-// } else {
-// //不存在则,则构建一个新的map,同时将isEnd设置为0,因为他不是最后一
-// Map newWordMap = new HashMap<>(8);
-// newWordMap.put(AppConst.IS_END, false);
-//
-// // 将新的节点放入当前 map 中
-// currentMap.put(charKey, newWordMap);
-//
-// // 将新节点设置为当前节点,方便下一次节点的循环。
-// currentMap = newWordMap;
-// }
-//
-// // 判断是否为最后一个,添加是否结束的标识。
-// if (i == size - 1) {
-// currentMap.put(AppConst.IS_END, true);
-// }
-// }
-// }
-//
-// // 最后更新为新的 map,保证更新过程中旧的数据可用
-// this.innerWordMap = newInnerWordMap;
-// }
-//
-// /**
-// * 是否包含
-// * (1)直接遍历所有
-// * (2)如果遇到,则直接返回 true
-// *
-// * @param string 字符串
-// * @return 是否包含
-// * @since 0.0.1
-// */
-// @Override
-// public boolean contains(String string, final IWordContext context) {
-// if (StringUtil.isEmpty(string)) {
-// return false;
-// }
-//
-// for (int i = 0; i < string.length(); i++) {
-// SensitiveCheckResult checkResult = sensitiveCheck(string, i, ValidModeEnum.FAIL_FAST, context);
-// // 快速返回
-// if (checkResult.index() > 0) {
-// return true;
-// }
-// }
-// return false;
-// }
-//
-// /**
-// * 返回所有对应的敏感词
-// * (1)结果是有序的
-// * (2)为了保留所有的下标,结果从 v0.1.0 之后不再去重。
-// *
-// * @param string 原始字符串
-// * @return 结果
-// * @since 0.0.1
-// */
-// @Override
-// public List findAll(String string, final IWordContext context) {
-// return getSensitiveWords(string, ValidModeEnum.FAIL_OVER, context);
-// }
-//
-// @Override
-// public IWordResult findFirst(String string, final IWordContext context) {
-// List stringList = getSensitiveWords(string, ValidModeEnum.FAIL_FAST, context);
-//
-// if (CollectionUtil.isEmpty(stringList)) {
-// return null;
-// }
-//
-// return stringList.get(0);
-// }
-//
-// @Override
-// public String replace(String target, final IWordContext context) {
-// if(StringUtil.isEmpty(target)) {
-// return target;
-// }
-//
-// return this.replaceSensitiveWord(target, context);
-// }
-//
-// /**
-// * 获取敏感词列表
-// *
-// * @param text 文本
-// * @param modeEnum 模式
-// * @return 结果列表
-// * @since 0.0.1
-// */
-// private List getSensitiveWords(final String text, final ValidModeEnum modeEnum,
-// final IWordContext context) {
-// //1. 是否存在敏感词,如果比存在,直接返回空列表
-// if (StringUtil.isEmpty(text)) {
-// return Guavas.newArrayList();
-// }
-//
-// List resultList = Guavas.newArrayList();
-// for (int i = 0; i < text.length(); i++) {
-// SensitiveCheckResult checkResult = sensitiveCheck(text, i, ValidModeEnum.FAIL_OVER, context);
-// // 命中
-// int wordLength = checkResult.index();
-// if (wordLength > 0) {
-// // 保存敏感词
-// String sensitiveWord = text.substring(i, i + wordLength);
-//
-// // 添加去重
-// WordResult wordResult = WordResult.newInstance()
-// .startIndex(i)
-// .endIndex(i+wordLength)
-// .word(sensitiveWord);
-// resultList.add(wordResult);
-//
-// // 快速返回
-// if (ValidModeEnum.FAIL_FAST.equals(modeEnum)) {
-// break;
-// }
-//
-// // 增加 i 的步长
-// // 为什么要-1,因为默认就会自增1
-// // TODO: 这里可以根据字符串匹配算法优化。
-// i += wordLength - 1;
-// }
-// }
-//
-// return resultList;
-// }
-//
-// /**
-// * 直接替换敏感词,返回替换后的结果
-// * @param target 文本信息
-// * @param context 上下文
-// * @return 脱敏后的字符串
-// * @since 0.0.2
-// */
-// private String replaceSensitiveWord(final String target,
-// final IWordContext context) {
-// if(StringUtil.isEmpty(target)) {
-// return target;
-// }
-// // 用于结果构建
-// StringBuilder resultBuilder = new StringBuilder(target.length());
-//
-// for (int i = 0; i < target.length(); i++) {
-// char currentChar = target.charAt(i);
-// // 内层直接从 i 开始往后遍历,这个算法的,获取第一个匹配的单词
-// SensitiveCheckResult checkResult = sensitiveCheck(target, i, ValidModeEnum.FAIL_OVER, context);
-//
-// // 敏感词
-// int wordLength = checkResult.index();
-// if(wordLength > 0) {
-// // 是否执行替换
-// Class checkClass = checkResult.checkClass();
-// String string = target.substring(i, i+wordLength);
-// if(SensitiveCheckUrl.class.equals(checkClass)
-// && FileUtil.isImage(string)) {
-// // 直接使用原始内容,避免 markdown 图片转换失败
-// resultBuilder.append(string);
-// } else {
-// // 创建上下文
-// ISensitiveWordReplaceContext replaceContext = SensitiveWordReplaceContext.newInstance()
-// .sensitiveWord(string)
-// .wordLength(wordLength);
-// String replaceStr = context.sensitiveWordReplace().replace(replaceContext);
-//
-// resultBuilder.append(replaceStr);
-// }
-//
-// // 直接跳过敏感词的长度
-// i += wordLength-1;
-// } else {
-// // 普通词
-// resultBuilder.append(currentChar);
-// }
-// }
-//
-// return resultBuilder.toString();
-// }
-//
-// @Override
-// public SensitiveCheckResult sensitiveCheck(String txt, int beginIndex, ValidModeEnum validModeEnum, IWordContext context) {
-// // 默认执行敏感词操作
-// context.sensitiveWordMap(innerWordMap);
-//
-// // 责任链模式调用
-// return context.sensitiveCheck()
-// .sensitiveCheck(txt, beginIndex, validModeEnum, context);
-// }
-//
-//}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/map/WordMaps.java b/src/main/java/com/github/houbb/sensitive/word/support/map/WordMaps.java
deleted file mode 100644
index f759bc2..0000000
--- a/src/main/java/com/github/houbb/sensitive/word/support/map/WordMaps.java
+++ /dev/null
@@ -1,24 +0,0 @@
-package com.github.houbb.sensitive.word.support.map;
-
-import com.github.houbb.sensitive.word.api.IWordMap;
-
-/**
- * 敏感词 map
- *
- * @author binbin.hou
- * @since 0.3.0
- */
-public final class WordMaps {
-
- private WordMaps(){}
-
- /**
- * 默认策略
- * @return 策略
- * @since 0.3.0
- */
- public static IWordMap defaults() {
- return new WordMap();
- }
-
-}
diff --git a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaces.java b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaces.java
index 552547e..a2f8034 100644
--- a/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaces.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/replace/SensitiveWordReplaces.java
@@ -31,4 +31,13 @@ public final class SensitiveWordReplaces {
return new SensitiveWordReplaceChar();
}
+ /**
+ * 字符,默认为 *
+ * @return 结果
+ * @since 0.7.0
+ */
+ public static ISensitiveWordReplace defaults() {
+ return chars();
+ }
+
}
diff --git a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkTimesTest.java b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkTimesTest.java
index c0aa035..d820e3c 100644
--- a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkTimesTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkTimesTest.java
@@ -13,6 +13,7 @@ public class BenchmarkTimesTest {
* 测试基准:100+字符串 * 10W次
*
* V0.6.0: 1470ms,接近 7.2W QPS
+ * V0.7.0: 1380ms
*/
@Test
public void onlyWordAndNoReplaceTest() {
@@ -45,6 +46,7 @@ public class BenchmarkTimesTest {
* 测试基准:100+字符串 * 10W次
*
* V0.6.0: 2744ms, 约 3.7W QPS
+ * V0.7.0: 2723ms
*/
@Test
public void onlyWordAndWithReplaceTest() {
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsDataTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsDataTest.java
new file mode 100644
index 0000000..104d84b
--- /dev/null
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsDataTest.java
@@ -0,0 +1,27 @@
+package com.github.houbb.sensitive.word.bs;
+
+import com.github.houbb.sensitive.word.support.data.WordDatas;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * project: sensitive-word-SensitiveWordBsConfigTest
+ * create on 2020/1/7 23:43
+ *
+ * @author Administrator
+ * @since 0.7.0
+ */
+public class SensitiveWordBsDataTest {
+
+ @Test
+ public void wordDataConfigTest() {
+ SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
+ .wordData(WordDatas.tree())
+ .init();
+
+ final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
+ Assert.assertTrue(wordBs.contains(text));
+ Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordBs.findAll(text).toString());
+ }
+
+}
diff --git a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java
index 979e25d..72f0a03 100644
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsTest.java
@@ -106,8 +106,8 @@ public class SensitiveWordBsTest {
@Test
public void configTest() {
SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
- .wordDeny(WordDenys.system())
- .wordAllow(WordAllows.system())
+ .wordDeny(WordDenys.defaults())
+ .wordAllow(WordAllows.defaults())
.init();
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
diff --git a/src/test/java/com/github/houbb/sensitive/word/bugs/b20211211/MySensitiveTest.java b/src/test/java/com/github/houbb/sensitive/word/bugs/b20211211/MySensitiveTest.java
index 1cb2144..69a75cb 100644
--- a/src/test/java/com/github/houbb/sensitive/word/bugs/b20211211/MySensitiveTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bugs/b20211211/MySensitiveTest.java
@@ -12,8 +12,8 @@ public class MySensitiveTest {
@Test
public void test() {
- IWordDeny wordDeny = WordDenys.chains(WordDenys.system(), new MyWordDeny());
- IWordAllow wordAllow = WordAllows.chains(WordAllows.system(), new MyWordAllow());
+ IWordDeny wordDeny = WordDenys.chains(WordDenys.defaults(), new MyWordDeny());
+ IWordAllow wordAllow = WordAllows.chains(WordAllows.defaults(), new MyWordAllow());
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
.wordAllow(wordAllow)
.wordDeny(wordDeny)// 各种其他配置
diff --git a/src/test/java/com/github/houbb/sensitive/word/bugs/b32/MyWordDenyChineseTest.java b/src/test/java/com/github/houbb/sensitive/word/bugs/b32/MyWordDenyChineseTest.java
index 282b4da..bd4c7b9 100644
--- a/src/test/java/com/github/houbb/sensitive/word/bugs/b32/MyWordDenyChineseTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bugs/b32/MyWordDenyChineseTest.java
@@ -10,7 +10,7 @@ public class MyWordDenyChineseTest {
@Test
public void test() {
- IWordDeny wordDeny = WordDenys.chains(WordDenys.system(), new MyWordDenyChineseNum());
+ IWordDeny wordDeny = WordDenys.chains(WordDenys.defaults(), new MyWordDenyChineseNum());
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
.wordDeny(wordDeny)// 各种其他配置
.init();// init() 初始化敏感词字典
diff --git a/src/test/java/com/github/houbb/sensitive/word/define/SensitiveWordBsDefineTest.java b/src/test/java/com/github/houbb/sensitive/word/define/SensitiveWordBsDefineTest.java
index 63c982c..88848d3 100644
--- a/src/test/java/com/github/houbb/sensitive/word/define/SensitiveWordBsDefineTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/define/SensitiveWordBsDefineTest.java
@@ -30,8 +30,8 @@ public class SensitiveWordBsDefineTest {
public void defineChainsTest() {
String text = "这是一个测试。我的自定义敏感词。";
- IWordDeny wordDeny = WordDenys.chains(WordDenys.system(), new MyWordDeny());
- IWordAllow wordAllow = WordAllows.chains(WordAllows.system(), new MyWordAllow());
+ IWordDeny wordDeny = WordDenys.chains(WordDenys.defaults(), new MyWordDeny());
+ IWordAllow wordAllow = WordAllows.chains(WordAllows.defaults(), new MyWordAllow());
SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
.wordDeny(wordDeny)
diff --git a/src/test/java/com/github/houbb/sensitive/word/memory/DataMemoryTest.java b/src/test/java/com/github/houbb/sensitive/word/memory/DataMemoryTest.java
new file mode 100644
index 0000000..6e30cae
--- /dev/null
+++ b/src/test/java/com/github/houbb/sensitive/word/memory/DataMemoryTest.java
@@ -0,0 +1,49 @@
+package com.github.houbb.sensitive.word.memory;
+
+import com.github.houbb.heaven.util.io.StreamUtil;
+import com.github.houbb.sensitive.word.api.IWordData;
+import com.github.houbb.sensitive.word.support.data.WordDatas;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.util.List;
+
+/**
+ * 数据内存测试
+ *
+ * @since 0.7.0
+ */
+@Ignore
+public class DataMemoryTest {
+
+ /**
+ * 35.5 MB
+ */
+ @Test
+ public void hashMapTest() {
+ List allLines = StreamUtil.readAllLines("/dict.txt");
+ IWordData wordData = WordDatas.defaults();
+
+ wordData.initWordData(allLines);
+
+ //计算指定对象及其引用树上的所有对象的综合大小,返回可读的结果,如:2KB
+ String humanSize = RamUsageEstimator.humanSizeOf(wordData);
+ System.out.println(humanSize);
+ }
+
+
+ //33.4 MB
+ @Test
+ public void treeTest() {
+ List allLines = StreamUtil.readAllLines("/dict.txt");
+ IWordData wordData = WordDatas.tree();
+
+ wordData.initWordData(allLines);
+
+ //计算指定对象及其引用树上的所有对象的综合大小,返回可读的结果,如:2KB
+ String humanSize = RamUsageEstimator.humanSizeOf(wordData);
+ System.out.println(humanSize);
+ }
+
+}
diff --git a/src/test/java/com/github/houbb/sensitive/word/spring/SpringSensitiveWordConfig.java b/src/test/java/com/github/houbb/sensitive/word/spring/SpringSensitiveWordConfig.java
index 85ca8e4..af7b545 100644
--- a/src/test/java/com/github/houbb/sensitive/word/spring/SpringSensitiveWordConfig.java
+++ b/src/test/java/com/github/houbb/sensitive/word/spring/SpringSensitiveWordConfig.java
@@ -29,7 +29,7 @@ public class SpringSensitiveWordConfig {
@Bean
public SensitiveWordBs sensitiveWordBs() {
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
- .wordAllow(WordAllows.chains(WordAllows.system(), myDdWordAllow))
+ .wordAllow(WordAllows.chains(WordAllows.defaults(), myDdWordAllow))
.wordDeny(myDdWordDeny)
// 各种其他配置
.init();