mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 00:17:35 +08:00
黑白名单共同检测时逻辑的修改
This commit is contained in:
@@ -19,7 +19,7 @@ public interface ISensitiveWord {
|
||||
* @see WordValidModeEnum#FAIL_OVER 建议使用全部检测返回模式
|
||||
*/
|
||||
List<IWordResult> findAll(final String string,
|
||||
final IWordContext context);
|
||||
final IWordContext context);
|
||||
|
||||
/**
|
||||
* 返回第一个对应的敏感词
|
||||
@@ -29,22 +29,20 @@ public interface ISensitiveWord {
|
||||
* @since 0.3.2
|
||||
*/
|
||||
IWordResult findFirst(final String string,
|
||||
final IWordContext context);
|
||||
final IWordContext context);
|
||||
|
||||
/**
|
||||
* 替换所有敏感词内容
|
||||
* <p>
|
||||
*
|
||||
* ps: 这里可以添加优化。
|
||||
*
|
||||
* @param target 目标字符串
|
||||
* @param target 目标字符串
|
||||
* @param context 上下文
|
||||
* @param replace 替换策略
|
||||
* @return 替换后结果
|
||||
* @since 0.3.2
|
||||
*/
|
||||
String replace(final String target,
|
||||
final IWordContext context,
|
||||
final IWordReplace replace);
|
||||
final IWordContext context);
|
||||
|
||||
/**
|
||||
* 包含
|
||||
@@ -54,6 +52,6 @@ public interface ISensitiveWord {
|
||||
* @since 0.3.2
|
||||
*/
|
||||
boolean contains(final String string,
|
||||
final IWordContext context);
|
||||
final IWordContext context);
|
||||
|
||||
}
|
||||
|
||||
@@ -672,12 +672,10 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
||||
* @since 0.2.0
|
||||
*/
|
||||
public String replace(final String target) {
|
||||
return this.replace(target,context.wordReplace());
|
||||
}
|
||||
public String replace(final String target, IWordReplace replace) {
|
||||
return sensitiveWord.replace(target, context, replace);
|
||||
return sensitiveWord.replace(target, context);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 获取敏感词的标签
|
||||
*
|
||||
|
||||
@@ -38,8 +38,9 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
|
||||
* @return 结果
|
||||
* @since 0.3.2
|
||||
*/
|
||||
protected String doReplace(String target, List<IWordResult> allList, IWordContext context, IWordReplace replace) {
|
||||
protected String doReplace(String target, List<IWordResult> allList, IWordContext context) {
|
||||
// 根据 index 直接分割
|
||||
final IWordReplace replace = context.wordReplace();
|
||||
|
||||
// 是否需要对 allList 排序?
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
@@ -90,7 +91,7 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String replace(String target, IWordContext context, IWordReplace replace) {
|
||||
public String replace(String target, IWordContext context) {
|
||||
if(StringUtil.isEmpty(target)) {
|
||||
return target;
|
||||
}
|
||||
@@ -100,9 +101,8 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
|
||||
return target;
|
||||
}
|
||||
|
||||
return doReplace(target, allList, context, replace);
|
||||
return doReplace(target, allList, context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(String string, IWordContext context) {
|
||||
//1. 第一个存在
|
||||
|
||||
@@ -37,7 +37,7 @@ public class SensitiveWord extends AbstractSensitiveWord {
|
||||
@Override
|
||||
protected IWordResult doFindFirst(String string, IWordContext context) {
|
||||
List<IWordResult> wordResults = innerSensitiveWords(string, WordValidModeEnum.FAIL_FAST, context);
|
||||
if(!CollectionUtil.isEmpty(wordResults)){
|
||||
if (!CollectionUtil.isEmpty(wordResults)) {
|
||||
return wordResults.get(0);
|
||||
}
|
||||
return null;
|
||||
@@ -47,14 +47,14 @@ public class SensitiveWord extends AbstractSensitiveWord {
|
||||
/**
|
||||
* 获取敏感词列表
|
||||
*
|
||||
* @param text 文本
|
||||
* @param text 文本
|
||||
* @param modeEnum 模式
|
||||
* @return 结果列表
|
||||
* @since 0.0.1
|
||||
*/
|
||||
private List<IWordResult> innerSensitiveWords(final String text,
|
||||
final WordValidModeEnum modeEnum,
|
||||
final IWordContext context) {
|
||||
final WordValidModeEnum modeEnum,
|
||||
final IWordContext context) {
|
||||
//1. 是否存在敏感词,如果比存在,直接返回空列表
|
||||
final IWordCheck sensitiveCheck = context.sensitiveCheck();
|
||||
List<IWordResult> resultList = Guavas.newArrayList();
|
||||
@@ -74,38 +74,32 @@ public class SensitiveWord extends AbstractSensitiveWord {
|
||||
// v0.21.0 白名单跳过
|
||||
WordCheckResult checkResult = sensitiveCheck.sensitiveCheck(i, checkContext);
|
||||
int wordLengthAllow = checkResult.wordLengthResult().wordAllowLen();
|
||||
if(wordLengthAllow > 0) {
|
||||
i += wordLengthAllow-1;
|
||||
continue;
|
||||
}
|
||||
int wordLengthDeny = checkResult.wordLengthResult().wordDenyLen();
|
||||
|
||||
|
||||
// 命中
|
||||
final WordLengthResult wordLengthResult = checkResult.wordLengthResult();
|
||||
int wordLength = wordLengthResult.wordDenyLen();
|
||||
if (wordLength > 0) {
|
||||
//如果命中的白名单长度小于黑名单,则直接对黑名单的敏感词进行保存
|
||||
if (wordLengthAllow < wordLengthDeny) {
|
||||
// 保存敏感词
|
||||
WordResult wordResult = WordResult.newInstance()
|
||||
.startIndex(i)
|
||||
.endIndex(i+wordLength)
|
||||
.endIndex(i + wordLengthDeny)
|
||||
.type(checkResult.type())
|
||||
.word(wordLengthResult.wordDeny());
|
||||
.word(checkResult.wordLengthResult().wordDeny());
|
||||
|
||||
//v0.13.0 添加判断
|
||||
if(wordResultCondition.match(wordResult, text, modeEnum, context)) {
|
||||
if (wordResultCondition.match(wordResult, text, modeEnum, context)) {
|
||||
resultList.add(wordResult);
|
||||
// 快速返回
|
||||
if (WordValidModeEnum.FAIL_FAST.equals(modeEnum)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// 增加 i 的步长
|
||||
// 为什么要-1,因为默认就会自增1
|
||||
// TODO: 这里可以根据字符串匹配算法优化。
|
||||
i += wordLength - 1;
|
||||
i += wordLengthDeny - 1;
|
||||
} else {
|
||||
//如果命中的白名单长度大于黑名单长度,则跳过白名单个字符
|
||||
i += Math.max(0, wordLengthAllow - 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ public class WordCheckWord extends AbstractWordCheck {
|
||||
int tempLen = 0;
|
||||
int maxWhite = 0;
|
||||
int maxBlack = 0;
|
||||
int skipLen=0;
|
||||
int skipLen = 0;
|
||||
|
||||
for (int i = beginIndex; i < rawChars.length; i++) {
|
||||
if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) {
|
||||
@@ -67,24 +67,18 @@ public class WordCheckWord extends AbstractWordCheck {
|
||||
WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
|
||||
|
||||
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) {
|
||||
maxWhite += tempLen;
|
||||
if (!failFast) {
|
||||
//此处将tempLen设为0,为了防止重复累加
|
||||
tempLen = 0;
|
||||
}else{
|
||||
maxWhite = tempLen;
|
||||
if (failFast) {
|
||||
//为falFast模式,主动设为notFound退出循环
|
||||
wordContainsTypeEnumAllow=WordContainsTypeEnum.NOT_FOUND;
|
||||
wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) {
|
||||
maxBlack += tempLen;
|
||||
if (!failFast) {
|
||||
//此处将tempLen设为0,为了防止重复累加
|
||||
tempLen = 0;
|
||||
}else{
|
||||
maxBlack = tempLen;
|
||||
if (failFast) {
|
||||
//为falFast模式,主动设为notFound退出循环
|
||||
wordContainsTypeEnumDeny=WordContainsTypeEnum.NOT_FOUND;
|
||||
wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -95,8 +89,8 @@ public class WordCheckWord extends AbstractWordCheck {
|
||||
}
|
||||
|
||||
String string = stringBuilder.toString();
|
||||
String wordAllow = string.substring(0, Math.max(0,maxWhite - skipLen));
|
||||
String wordDeny = string.substring(0, Math.max(0,maxBlack - skipLen));
|
||||
String wordAllow = string.substring(0, Math.max(0, maxWhite - skipLen));
|
||||
String wordDeny = string.substring(0, Math.max(0, maxBlack - skipLen));
|
||||
|
||||
|
||||
return WordLengthResult.newInstance()
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordReplace;
|
||||
import com.github.houbb.sensitive.word.replace.MyWordReplace;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class SensitiveWordBsReplaceTest {
|
||||
|
||||
@Test
|
||||
public void defineReplaceTest() {
|
||||
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
|
||||
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance().init();
|
||||
|
||||
IWordReplace replace = new MyWordReplace();
|
||||
String result = sensitiveWordBs.replace(text, replace);
|
||||
|
||||
Assert.assertEquals("国家旗帜迎风飘扬,教员的画像屹立在***前。", result);
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,12 @@
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordAllow;
|
||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class SensitiveWordFailFastTest {
|
||||
@@ -20,12 +22,108 @@ public class SensitiveWordFailFastTest {
|
||||
}
|
||||
}).init();
|
||||
|
||||
String text = "我在我的家里玩我的世界";
|
||||
SensitiveWordBs bs1 = SensitiveWordBs.newInstance()
|
||||
.failFastWordPattern(true)
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Collections.singletonList("操你妈");
|
||||
}
|
||||
})
|
||||
.wordAllow(new IWordAllow() {
|
||||
@Override
|
||||
public List<String> allow() {
|
||||
return Collections.singletonList("你");
|
||||
}
|
||||
})
|
||||
.init();
|
||||
|
||||
|
||||
//黑长白短,且初始下标一致
|
||||
SensitiveWordBs bs2 = SensitiveWordBs.newInstance()
|
||||
.failFastWordPattern(true)
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Collections.singletonList("大傻逼");
|
||||
}
|
||||
})
|
||||
.wordAllow(new IWordAllow() {
|
||||
@Override
|
||||
public List<String> allow() {
|
||||
return Collections.singletonList("大");
|
||||
}
|
||||
})
|
||||
.init();
|
||||
|
||||
|
||||
|
||||
//白长黑短,且白和黑初始下标不再一起
|
||||
SensitiveWordBs bs3 = SensitiveWordBs.newInstance()
|
||||
.failFastWordPattern(true)
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Collections.singletonList("口交");
|
||||
}
|
||||
})
|
||||
.wordAllow(new IWordAllow() {
|
||||
@Override
|
||||
public List<String> allow() {
|
||||
return Collections.singletonList("地铁口交易");
|
||||
}
|
||||
})
|
||||
.init();
|
||||
|
||||
|
||||
//白长黑短,且白和黑初始下标在一起
|
||||
SensitiveWordBs bs4 = SensitiveWordBs.newInstance()
|
||||
.failFastWordPattern(true)
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Collections.singletonList("龟孙");
|
||||
}
|
||||
})
|
||||
.wordAllow(new IWordAllow() {
|
||||
@Override
|
||||
public List<String> allow() {
|
||||
return Collections.singletonList("龟孙可");
|
||||
}
|
||||
})
|
||||
.init();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
String text = "我在我的家里玩我的世界";
|
||||
List<String> textList = bs.findAll(text);
|
||||
Assert.assertEquals(Arrays.asList("我的", "我的"), textList);
|
||||
|
||||
|
||||
String text1 = "操你妈";
|
||||
List<String> textList1 = bs1.findAll(text1);
|
||||
Assert.assertEquals(Collections.singletonList("操你妈"), textList1);
|
||||
|
||||
String text2 = "大傻逼";
|
||||
List<String> textList2 = bs2.findAll(text2);
|
||||
Assert.assertEquals(Collections.singletonList("大傻逼"), textList2);
|
||||
|
||||
|
||||
String text3 = "地铁口交易";
|
||||
List<String> textList3 = bs3.findAll(text3);
|
||||
Assert.assertTrue("Expected empty list", textList3.isEmpty());
|
||||
|
||||
String text4 = "龟孙可";
|
||||
List<String> textList4 = bs4.findAll(text4);
|
||||
Assert.assertTrue("Expected empty list", textList4.isEmpty());
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void fallOverTest() {
|
||||
SensitiveWordBs bs = SensitiveWordBs.newInstance()
|
||||
@@ -37,11 +135,109 @@ public class SensitiveWordFailFastTest {
|
||||
}
|
||||
}).init();
|
||||
|
||||
String text = "我在我的家里玩我的世界";
|
||||
|
||||
//黑长白短,且初始下标不一致
|
||||
SensitiveWordBs bs1 = SensitiveWordBs.newInstance()
|
||||
.failFastWordPattern(false)
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Collections.singletonList("操你妈");
|
||||
}
|
||||
})
|
||||
.wordAllow(new IWordAllow() {
|
||||
@Override
|
||||
public List<String> allow() {
|
||||
return Collections.singletonList("你");
|
||||
}
|
||||
})
|
||||
.init();
|
||||
|
||||
|
||||
//黑长白短,且初始下标一致
|
||||
SensitiveWordBs bs2 = SensitiveWordBs.newInstance()
|
||||
.failFastWordPattern(false)
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Collections.singletonList("大傻逼");
|
||||
}
|
||||
})
|
||||
.wordAllow(new IWordAllow() {
|
||||
@Override
|
||||
public List<String> allow() {
|
||||
return Collections.singletonList("大");
|
||||
}
|
||||
})
|
||||
.init();
|
||||
|
||||
|
||||
|
||||
//白长黑短,且白和黑初始下标不再一起
|
||||
SensitiveWordBs bs3 = SensitiveWordBs.newInstance()
|
||||
.failFastWordPattern(false)
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Collections.singletonList("口交");
|
||||
}
|
||||
})
|
||||
.wordAllow(new IWordAllow() {
|
||||
@Override
|
||||
public List<String> allow() {
|
||||
return Collections.singletonList("地铁口交易");
|
||||
}
|
||||
})
|
||||
.init();
|
||||
|
||||
|
||||
//白长黑短,且白和黑初始下标在一起
|
||||
SensitiveWordBs bs4 = SensitiveWordBs.newInstance()
|
||||
.failFastWordPattern(false)
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Collections.singletonList("龟孙");
|
||||
}
|
||||
})
|
||||
.wordAllow(new IWordAllow() {
|
||||
@Override
|
||||
public List<String> allow() {
|
||||
return Collections.singletonList("龟孙可");
|
||||
}
|
||||
})
|
||||
.init();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
String text = "我在我的家里玩我的世界";
|
||||
List<String> textList = bs.findAll(text);
|
||||
Assert.assertEquals(Arrays.asList("我的", "我的世界"), textList);
|
||||
|
||||
|
||||
String text1 = "操你妈";
|
||||
List<String> textList1 = bs1.findAll(text1);
|
||||
Assert.assertEquals(Collections.singletonList("操你妈"), textList1);
|
||||
|
||||
String text2 = "大傻逼";
|
||||
List<String> textList2 = bs2.findAll(text2);
|
||||
Assert.assertEquals(Collections.singletonList("大傻逼"), textList2);
|
||||
|
||||
|
||||
String text3 = "地铁口交易";
|
||||
List<String> textList3 = bs3.findAll(text3);
|
||||
Assert.assertTrue("Expected empty list", textList3.isEmpty());
|
||||
|
||||
String text4 = "龟孙可";
|
||||
List<String> textList4 = bs4.findAll(text4);
|
||||
Assert.assertTrue("Expected empty list", textList4.isEmpty());
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user