mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 16:37:17 +08:00
release branch 0.1.1
This commit is contained in:
@@ -9,8 +9,9 @@ import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
||||
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||
import com.github.houbb.sensitive.word.support.map.SensitiveWordMap;
|
||||
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
|
||||
import com.github.houbb.sensitive.word.utils.InnerFormatUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* 敏感词引导类
|
||||
@@ -64,7 +65,7 @@ public class SensitiveWordBs {
|
||||
// 加载配置信息
|
||||
List<String> denyList = wordDeny.deny();
|
||||
List<String> allowList = wordAllow.allow();
|
||||
List<String> results = CollectionUtil.difference(denyList, allowList);
|
||||
List<String> results = getActualDenyList(denyList, allowList);
|
||||
|
||||
// 初始化 DFA 信息
|
||||
if(sensitiveWordMap == null) {
|
||||
@@ -74,6 +75,59 @@ public class SensitiveWordBs {
|
||||
sensitiveWordMap.initWordMap(results);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取禁止列表中真正的禁止词汇
|
||||
* @param denyList 禁止
|
||||
* @param allowList 允许
|
||||
* @return 结果
|
||||
* @since 0.1.1
|
||||
*/
|
||||
List<String> getActualDenyList(List<String> denyList,
|
||||
List<String> allowList) {
|
||||
if(CollectionUtil.isEmpty(denyList)) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
if(CollectionUtil.isEmpty(allowList)) {
|
||||
return denyList;
|
||||
}
|
||||
|
||||
List<String> formatDenyList = this.formatWordList(denyList);
|
||||
List<String> formatAllowList = this.formatWordList(allowList);
|
||||
|
||||
List<String> resultList = new ArrayList<>();
|
||||
// O(1)
|
||||
Set<String> allowSet = new HashSet<>(formatAllowList);
|
||||
|
||||
for(String deny : formatDenyList) {
|
||||
if(allowSet.contains(deny)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
resultList.add(deny);
|
||||
}
|
||||
return resultList;
|
||||
}
|
||||
|
||||
/**
|
||||
* 数据格式化处理
|
||||
* @param list 列表
|
||||
* @return 结果
|
||||
* @since 0.1.1
|
||||
*/
|
||||
private List<String> formatWordList(List<String> list) {
|
||||
if(CollectionUtil.isEmpty(list)) {
|
||||
return list;
|
||||
}
|
||||
|
||||
List<String> resultList = new ArrayList<>(list.size());
|
||||
for(String word : list) {
|
||||
String formatWord = InnerFormatUtils.format(word, this.context);
|
||||
resultList.add(formatWord);
|
||||
}
|
||||
|
||||
return resultList;
|
||||
}
|
||||
|
||||
/**
|
||||
* 新建验证实例
|
||||
* <p>
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
package com.github.houbb.sensitive.word.utils;
|
||||
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.sensitive.word.api.ICharFormat;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||
|
||||
/**
|
||||
* 内部格式化工具类
|
||||
* @since 0.1.1
|
||||
*/
|
||||
public final class InnerFormatUtils {
|
||||
|
||||
private InnerFormatUtils(){}
|
||||
|
||||
/**
|
||||
* 格式化
|
||||
* @param original 原始
|
||||
* @param context 上下文
|
||||
* @return 结果
|
||||
* @since 0.1.1
|
||||
*/
|
||||
public static String format(String original, IWordContext context) {
|
||||
if(StringUtil.isEmpty(original)) {
|
||||
return original;
|
||||
}
|
||||
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
ICharFormat charFormat = Instances.singleton(CharFormatChain.class);
|
||||
char[] chars = original.toCharArray();
|
||||
for(char c : chars) {
|
||||
char cf = charFormat.format(c, context);
|
||||
stringBuilder.append(cf);
|
||||
}
|
||||
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.github.houbb.sensitive.word.bugs.b20211211;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordAllow;
|
||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
||||
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||
import org.junit.Test;
|
||||
|
||||
public class MySensitiveTest {
|
||||
|
||||
|
||||
@Test
|
||||
public void test() {
|
||||
IWordDeny wordDeny = WordDenys.chains(WordDenys.system(), new MyWordDeny());
|
||||
IWordAllow wordAllow = WordAllows.chains(WordAllows.system(), new MyWordAllow());
|
||||
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
|
||||
.wordAllow(wordAllow)
|
||||
.wordDeny(wordDeny)// 各种其他配置
|
||||
.init();// init() 初始化敏感词字典
|
||||
|
||||
final String text = "五星红旗 我的自定义敏感词尼玛";
|
||||
//输出测试结果
|
||||
System.out.println("敏感词:"+sensitiveWordBs.findAll(text).toString());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
package com.github.houbb.sensitive.word.bugs.b20211211;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordAllow;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class MyWordAllow implements IWordAllow {
|
||||
|
||||
@Override
|
||||
public List<String> allow() {
|
||||
return Arrays.asList("五星红旗");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.github.houbb.sensitive.word.bugs.b20211211;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class MyWordDeny implements IWordDeny {
|
||||
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Arrays.asList("尼玛");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
package com.github.houbb.sensitive.word.bugs;
|
||||
Reference in New Issue
Block a user