mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 16:37:17 +08:00
release branch 0.0.1
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
package com.github.houbb.sensitive.word.api;
|
||||
|
||||
import com.github.houbb.sensitive.word.model.WordMapEntry;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 敏感词 map
|
||||
@@ -12,12 +13,38 @@ import java.util.Map;
|
||||
*/
|
||||
public interface IWordMap {
|
||||
|
||||
|
||||
/**
|
||||
* 获取单词 map
|
||||
* @param collection 集合
|
||||
* @return 敏感词 map
|
||||
* 初始化敏感词 map
|
||||
* @param collection 集合信息
|
||||
* @since 0.0.1
|
||||
*/
|
||||
Map<String, WordMapEntry> getWordMap(final Collection<String> collection);
|
||||
void initWordMap(Collection<String> collection);
|
||||
|
||||
/**
|
||||
* 是否包含敏感词
|
||||
* @param string 字符串
|
||||
* @return 是否包含
|
||||
* @since 0.0.1
|
||||
* @see ValidModeEnum#FAIL_FAST 建议使用快速返回模式
|
||||
*/
|
||||
boolean contains(final String string);
|
||||
|
||||
/**
|
||||
* 返回所有对应的敏感词
|
||||
* @param string 原始字符串
|
||||
* @return 结果
|
||||
* @since 0.0.1
|
||||
* @see ValidModeEnum#FAIL_OVER 建议使用全部检测返回模式
|
||||
*/
|
||||
List<String> findAll(final String string);
|
||||
|
||||
/**
|
||||
* 返回第一个对应的敏感词
|
||||
* @param string 原始字符串
|
||||
* @return 结果
|
||||
* @since 0.0.1
|
||||
*/
|
||||
String findFirst(final String string);
|
||||
|
||||
}
|
||||
|
||||
@@ -5,7 +5,9 @@ import com.github.houbb.heaven.util.guava.Guavas;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordData;
|
||||
import com.github.houbb.sensitive.word.api.IWordMap;
|
||||
import com.github.houbb.sensitive.word.support.data.SensitiveWordData;
|
||||
import com.github.houbb.sensitive.word.support.map.SensitiveWordMap;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@@ -22,14 +24,6 @@ public class SensitiveWordBs {
|
||||
*/
|
||||
private SensitiveWordBs(){}
|
||||
|
||||
/**
|
||||
* 待验证字符串信息
|
||||
* ps: 可以添加多个辅助类 xxxStringProvider
|
||||
* 如 FileXXX
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private volatile String target;
|
||||
|
||||
/**
|
||||
* 敏感数据信息
|
||||
* @since 0.0.1
|
||||
@@ -37,82 +31,73 @@ public class SensitiveWordBs {
|
||||
private IWordData sensitiveWordData = Instances.singleton(SensitiveWordData.class);
|
||||
|
||||
/**
|
||||
* 新建验证实例
|
||||
* @param string 字符串
|
||||
* @return this
|
||||
* 敏感词 map
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public static SensitiveWordBs newInstance(final String string) {
|
||||
SensitiveWordBs instance = new SensitiveWordBs();
|
||||
instance.target = string;
|
||||
return instance;
|
||||
private IWordMap sensitiveWordMap = Instances.singleton(SensitiveWordMap.class);
|
||||
|
||||
/**
|
||||
* 获取单例信息
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private static final SensitiveWordBs INSTANCE;
|
||||
|
||||
static {
|
||||
synchronized (SensitiveWordBs.class) {
|
||||
INSTANCE = new SensitiveWordBs();
|
||||
List<String> lines = INSTANCE.sensitiveWordData.getWordData();
|
||||
INSTANCE.sensitiveWordMap.initWordMap(lines);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 指定目标字符串信息
|
||||
* @param string 字符串
|
||||
* 新建验证实例
|
||||
* @return this
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public SensitiveWordBs target(final String string) {
|
||||
this.target = string;
|
||||
return this;
|
||||
public static SensitiveWordBs getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
/**
|
||||
* 是否合法
|
||||
* @param target 目标字符串
|
||||
* @return 是否
|
||||
* @since 0.0.1
|
||||
* @see #contains() 是否包含
|
||||
*/
|
||||
public boolean valid() {
|
||||
return !contains();
|
||||
public boolean valid(final String target) {
|
||||
return !contains(target);
|
||||
}
|
||||
|
||||
/**
|
||||
* 是否包含敏感词
|
||||
* @param target 目标字符串
|
||||
* @return 是否
|
||||
* @since 0.0.1
|
||||
* @see #findAll() 列表不为空即可
|
||||
*/
|
||||
public boolean contains() {
|
||||
return CollectionUtil.isNotEmpty(findAll());
|
||||
public boolean contains(final String target) {
|
||||
return this.sensitiveWordMap.contains(target);
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回所有的敏感词
|
||||
* 1. 这里是默认去重的。
|
||||
* 1. 这里是默认去重的,且是有序的。
|
||||
* 2. 如果不存在,返回空列表
|
||||
* @return 敏感词列表
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public List<String> findAll() {
|
||||
if(StringUtil.isEmpty(target)) {
|
||||
return Guavas.newArrayList();
|
||||
}
|
||||
|
||||
// 分词
|
||||
return null;
|
||||
public List<String> findAll(final String target) {
|
||||
return this.sensitiveWordMap.findAll(target);
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行过滤
|
||||
* 1. 使用默认策略
|
||||
* 2. 默认策略就是直接移除。
|
||||
* @return 过滤后的结果
|
||||
* 返回第一个敏感词
|
||||
* (1)如果不存在,则返回 {@code null}
|
||||
* @return 敏感词
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private String filter() {
|
||||
return filter(StringUtil.EMPTY);
|
||||
}
|
||||
|
||||
/**
|
||||
* 指定过滤的字符,执行过滤
|
||||
* 1. filter 只是一种特殊的字符串替换策略。
|
||||
* @return 过滤后的结果
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private String filter(final String filter) {
|
||||
return "";
|
||||
public String findFirst(final String target) {
|
||||
return this.sensitiveWordMap.findFirst(target);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
package com.github.houbb.sensitive.word.constant;
|
||||
|
||||
/**
|
||||
* <p> project: sensitive-word-AppConst </p>
|
||||
* <p> create on 2020/1/7 23:39 </p>
|
||||
*
|
||||
* @author Administrator
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public final class AppConst {
|
||||
|
||||
/**
|
||||
* 是否为结束标识
|
||||
* ps: 某种角度而言,我不是很喜欢这种风格。
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public static final String IS_END = "isEnd";
|
||||
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
package com.github.houbb.sensitive.word.constant.enums;
|
||||
|
||||
/**
|
||||
* <p> project: sensitive-word-ValidModeEnum </p>
|
||||
* <p> create on 2020/1/7 22:46 </p>
|
||||
*
|
||||
* @author Administrator
|
||||
* @since 1.0.0
|
||||
*/
|
||||
public enum ValidModeEnum {
|
||||
|
||||
/**
|
||||
* 快速失败
|
||||
* @since 0.0.1
|
||||
*/
|
||||
FAIL_FAST,
|
||||
|
||||
/**
|
||||
* 全部遍历
|
||||
* @since 0.0.1
|
||||
*/
|
||||
FAIL_OVER
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
/**
|
||||
* <p> project: sensitive-word-package-info </p>
|
||||
* <p> create on 2020/1/7 22:46 </p>
|
||||
*
|
||||
* @author Administrator
|
||||
* @since 1.0.0
|
||||
*/
|
||||
package com.github.houbb.sensitive.word.constant;
|
||||
@@ -1,58 +0,0 @@
|
||||
package com.github.houbb.sensitive.word.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 所有的敏感词,第一个字都是 key
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public class WordMapEntry {
|
||||
|
||||
/**
|
||||
* 单个单词
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private String word;
|
||||
|
||||
/**
|
||||
* 是否为结束
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private boolean isEnd;
|
||||
|
||||
/**
|
||||
* 下一层的信息列表
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private List<WordMapEntry> nextEntryList;
|
||||
|
||||
public String word() {
|
||||
return word;
|
||||
}
|
||||
|
||||
public WordMapEntry word(String word) {
|
||||
this.word = word;
|
||||
return this;
|
||||
}
|
||||
|
||||
public boolean end() {
|
||||
return isEnd;
|
||||
}
|
||||
|
||||
public WordMapEntry end(boolean end) {
|
||||
isEnd = end;
|
||||
return this;
|
||||
}
|
||||
|
||||
public List<WordMapEntry> nextEntryList() {
|
||||
return nextEntryList;
|
||||
}
|
||||
|
||||
public WordMapEntry nextEntryList(List<WordMapEntry> nextEntryList) {
|
||||
this.nextEntryList = nextEntryList;
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -9,6 +9,7 @@ import java.util.List;
|
||||
|
||||
/**
|
||||
* 数据加载使用单例的模式,只需要加载一次即可。
|
||||
*
|
||||
* @author binbin.hou
|
||||
* @since 0.0.1
|
||||
*/
|
||||
@@ -17,18 +18,22 @@ public class SensitiveWordData implements IWordData {
|
||||
|
||||
/**
|
||||
* 默认的内置行
|
||||
*
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private static List<String> defaultLines;
|
||||
|
||||
static {
|
||||
long start = System.currentTimeMillis();
|
||||
defaultLines = new ArrayList<>(183837);
|
||||
defaultLines = StreamUtils.readAllLines("/dict.txt");
|
||||
long end = System.currentTimeMillis();
|
||||
System.out.println("Sensitive data loaded!, cost time: " + (end-start) + " ms");
|
||||
synchronized (SensitiveWordData.class) {
|
||||
long start = System.currentTimeMillis();
|
||||
defaultLines = new ArrayList<>(183837);
|
||||
defaultLines = StreamUtils.readAllLines("/dict.txt");
|
||||
long end = System.currentTimeMillis();
|
||||
System.out.println("Sensitive data loaded!, cost time: " + (end - start) + " ms");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<String> getWordData() {
|
||||
return defaultLines;
|
||||
|
||||
@@ -1,12 +1,18 @@
|
||||
package com.github.houbb.sensitive.word.support.map;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.util.guava.Guavas;
|
||||
import com.github.houbb.heaven.util.lang.ObjectUtil;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.heaven.util.util.MapUtil;
|
||||
import com.github.houbb.sensitive.word.api.IWordMap;
|
||||
import com.github.houbb.sensitive.word.model.WordMapEntry;
|
||||
import com.github.houbb.sensitive.word.constant.AppConst;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
@@ -18,89 +24,216 @@ import java.util.Map;
|
||||
@ThreadSafe
|
||||
public class SensitiveWordMap implements IWordMap {
|
||||
|
||||
/**
|
||||
* 脱敏单词 map
|
||||
*
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private static Map sensitiveWordMap;
|
||||
|
||||
/**
|
||||
* 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:
|
||||
*
|
||||
* <pre>
|
||||
* 中 = {
|
||||
* isEnd = 0
|
||||
* 国 = {
|
||||
* isEnd = 1
|
||||
* 人 = {isEnd = 0
|
||||
* 民 = {isEnd = 1}
|
||||
* }
|
||||
* 男 = {
|
||||
* isEnd = 0
|
||||
* 人 = {
|
||||
* isEnd = 1
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* 五 = {
|
||||
* isEnd = 0
|
||||
* 星 = {
|
||||
* isEnd = 0
|
||||
* 红 = {
|
||||
* isEnd = 0
|
||||
* 旗 = {
|
||||
* isEnd = 1
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* key: 对应的中文
|
||||
* value: 是否为结束。
|
||||
*
|
||||
* 日本人,日本鬼子为例
|
||||
*
|
||||
* 1、在hashMap中查询“日”看其是否在hashMap中存在,如果不存在,则证明已“日”开头的敏感词还不存在,则我们直接构建这样的一棵树。跳至3。
|
||||
* 2、如果在hashMap中查找到了,表明存在以“日”开头的敏感词,设置hashMap = hashMap.get("日"),跳至1,依次匹配“本”、“人”。
|
||||
* 3、判断该字是否为该词中的最后一个字。若是表示敏感词结束,设置标志位isEnd = 1,否则设置标志位isEnd = 0;
|
||||
*
|
||||
* @param collection 敏感词库集合
|
||||
* @since 0.0.1
|
||||
*
|
||||
* <p>
|
||||
* 使用对象代码 map 的这种一直递归。
|
||||
*
|
||||
* 参考资料:https://www.cnblogs.com/AlanLee/p/5329555.html
|
||||
* https://blog.csdn.net/chenssy/article/details/26961957
|
||||
*/
|
||||
@Override
|
||||
public Map<String, WordMapEntry> getWordMap(Collection<String> collection) {
|
||||
Map<String, WordMapEntry> resultMap = new HashMap<>(collection.size());
|
||||
@SuppressWarnings("unchecked")
|
||||
public void initWordMap(Collection<String> collection) {
|
||||
// 避免重复加载
|
||||
if (MapUtil.isNotEmpty(sensitiveWordMap)) {
|
||||
return;
|
||||
}
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
// 避免扩容带来的消耗
|
||||
sensitiveWordMap = new HashMap(collection.size());
|
||||
|
||||
for (String key : collection) {
|
||||
if (StringUtil.isEmpty(key)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 用来按照相应的格式保存敏感词库数据
|
||||
char[] chars = key.toCharArray();
|
||||
final int size = chars.length;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
String charStr = String.valueOf(chars[i]);
|
||||
// 每一个新词的循环,直接将结果设置为当前 map,所有变化都会体现在结果的 map 中
|
||||
Map currentMap = sensitiveWordMap;
|
||||
|
||||
// 直接获取对应的 map
|
||||
WordMapEntry wordMapEntry = resultMap.get(charStr);
|
||||
for (int i = 0; i < size; i++) {
|
||||
// 截取敏感词当中的字,在敏感词库中字为HashMap对象的Key键值
|
||||
char charKey = chars[i];
|
||||
// 如果集合存在
|
||||
Object wordMap = currentMap.get(charKey);
|
||||
|
||||
// 如果集合存在
|
||||
if(ObjectUtil.isNotNull(wordMapEntry)) {
|
||||
|
||||
if (ObjectUtil.isNotNull(wordMap)) {
|
||||
// 直接将获取到的 map 当前当前 map 进行继续的操作
|
||||
currentMap = (Map) wordMap;
|
||||
} else {
|
||||
// // 如果集合不存在,直接新建一个 map
|
||||
// wordMap = new HashMap<>(size);
|
||||
// // 判断是否为最后一个,如果是则设置为1
|
||||
// boolean isEnd = i == size - 1;
|
||||
// // 设置最后的结果
|
||||
// wordMap.put(charStr, isEnd);
|
||||
//不存在则,则构建一个新的map,同时将isEnd设置为0,因为他不是最后一
|
||||
Map<String, Boolean> newWordMap = new HashMap<>();
|
||||
newWordMap.put(AppConst.IS_END, false);
|
||||
|
||||
// 将新的节点放入当前 map 中
|
||||
currentMap.put(charKey, newWordMap);
|
||||
|
||||
// 将新节点设置为当前节点,方便下一次节点的循环。
|
||||
currentMap = newWordMap;
|
||||
}
|
||||
|
||||
// 判断是否为最后一个,添加是否结束的标识。
|
||||
if (i == size - 1) {
|
||||
currentMap.put(AppConst.IS_END, true);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return resultMap;
|
||||
|
||||
long endTime = System.currentTimeMillis();
|
||||
System.out.println("Init sensitive word map end! Cost time " + (endTime-startTime) + "ms");
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
System.out.println("s".toCharArray()[0]+"");
|
||||
/**
|
||||
* 是否包含
|
||||
* (1)直接遍历所有
|
||||
* (2)如果遇到,则直接返回 true
|
||||
*
|
||||
* @param string 字符串
|
||||
* @return 是否包含
|
||||
* @since 0.0.1
|
||||
*/
|
||||
@Override
|
||||
public boolean contains(String string) {
|
||||
if (StringUtil.isEmpty(string)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < string.length(); i++) {
|
||||
int checkResult = checkSensitiveWord(string, i);
|
||||
// 快速返回
|
||||
if (checkResult > 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回所有对应的敏感词
|
||||
* (1)结果是有序的
|
||||
* (2)结果是默认去重的
|
||||
*
|
||||
* @param string 原始字符串
|
||||
* @return 结果
|
||||
* @since 0.0.1
|
||||
*/
|
||||
@Override
|
||||
public List<String> findAll(String string) {
|
||||
return getSensitiveWords(string, ValidModeEnum.FAIL_OVER);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String findFirst(String string) {
|
||||
List<String> stringList = getSensitiveWords(string, ValidModeEnum.FAIL_FAST);
|
||||
|
||||
if (CollectionUtil.isEmpty(stringList)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return stringList.get(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取敏感词列表
|
||||
*
|
||||
* @param text 文本
|
||||
* @param modeEnum 模式
|
||||
* @return 结果列表
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private List<String> getSensitiveWords(final String text, final ValidModeEnum modeEnum) {
|
||||
//1. 是否存在敏感词,如果比存在,直接返回空列表
|
||||
if (StringUtil.isEmpty(text)) {
|
||||
return Guavas.newArrayList();
|
||||
}
|
||||
|
||||
List<String> resultList = Guavas.newArrayList();
|
||||
for (int i = 0; i < text.length(); i++) {
|
||||
int wordLength = checkSensitiveWord(text, i);
|
||||
|
||||
// 命中
|
||||
if (wordLength > 0) {
|
||||
// 保存敏感词
|
||||
String sensitiveWord = text.substring(i, i+wordLength);
|
||||
|
||||
// 添加去重
|
||||
if(!resultList.contains(sensitiveWord)) {
|
||||
resultList.add(sensitiveWord);
|
||||
}
|
||||
|
||||
// 快速返回
|
||||
if (ValidModeEnum.FAIL_FAST.equals(modeEnum)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// 增加 i 的步长
|
||||
// 为什么要-1,因为默认就会自增1
|
||||
i += wordLength - 1;
|
||||
}
|
||||
}
|
||||
|
||||
return resultList;
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查敏感词数量
|
||||
* <p>
|
||||
* (1)如果未命中敏感词,直接返回 0
|
||||
* (2)命中敏感词,则返回敏感词的长度。
|
||||
*
|
||||
* @param txt 文本信息
|
||||
* @param beginIndex 开始下标
|
||||
* @return 敏感词对应的长度
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private int checkSensitiveWord(String txt, int beginIndex) {
|
||||
Map nowMap = sensitiveWordMap;
|
||||
|
||||
boolean flag = false;
|
||||
// 记录敏感词的长度
|
||||
int sensitiveWordLength = 0;
|
||||
|
||||
for (int i = beginIndex; i < txt.length(); i++) {
|
||||
char charKey = txt.charAt(i);
|
||||
// 判断该字是否存在于敏感词库中
|
||||
// 并且将 nowMap 替换为新的 map,进入下一层的循环。
|
||||
nowMap = (Map) nowMap.get(charKey);
|
||||
if (ObjectUtil.isNotNull(nowMap)) {
|
||||
sensitiveWordLength++;
|
||||
|
||||
// 判断是否是敏感词的结尾字,如果是结尾字则判断是否继续检测
|
||||
boolean isEnd = (boolean) nowMap.get(AppConst.IS_END);
|
||||
if (isEnd) {
|
||||
flag = true;
|
||||
|
||||
// 这里直接默认 fail-fast 即可。
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// 直接跳出循环
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!flag) {
|
||||
sensitiveWordLength = 0;
|
||||
}
|
||||
return sensitiveWordLength;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -164386,7 +164386,6 @@ z以留吧以其以武
|
||||
百花故事
|
||||
百花盛放
|
||||
百行教师贱
|
||||
的
|
||||
的同修
|
||||
的妹 子 都 很 急 约
|
||||
的阿斗
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* <p> project: sensitive-word-SensitiveWordBsTest </p>
|
||||
* <p> create on 2020/1/7 23:43 </p>
|
||||
*
|
||||
* @author Administrator
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public class SensitiveWordBsTest {
|
||||
|
||||
@Test
|
||||
public void containsTest() {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。。";
|
||||
|
||||
Assert.assertTrue(SensitiveWordBs.getInstance().contains(text));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void findAllTest() {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.getInstance().findAll(text);
|
||||
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void findFirstTest() {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
|
||||
String word = SensitiveWordBs.getInstance().findFirst(text);
|
||||
Assert.assertEquals("五星红旗", word);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -102,4 +102,18 @@ public class DataInitTest {
|
||||
FileUtil.write(target, disctinct);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
public void oneWordTest() {
|
||||
final String source = "D:\\_github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
|
||||
List<String> lines = FileUtil.readAllLines(source);
|
||||
for(int i = 0; i < lines.size(); i++) {
|
||||
String line = lines.get(i);
|
||||
if(line.trim().length() == 1) {
|
||||
System.out.println(i + " === " + line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user