mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 00:17:35 +08:00
issue110,并废除一些黑白名单一次遍历时的冗余逻辑
This commit is contained in:
@@ -8,6 +8,17 @@ import com.github.houbb.sensitive.word.bs.SensitiveWordContext;
|
||||
*/
|
||||
public interface IWordContext {
|
||||
|
||||
|
||||
/**
|
||||
* 为true时,遇到第一个敏感词词就返回
|
||||
* 解决issue110
|
||||
* @return
|
||||
*/
|
||||
boolean failFastWordPattern();
|
||||
|
||||
IWordContext failFastWordPattern(boolean failFastWordPattern);
|
||||
|
||||
|
||||
/**
|
||||
* 是否忽略大小写
|
||||
* @return 是否
|
||||
|
||||
@@ -2,7 +2,6 @@ package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import com.github.houbb.heaven.support.handler.IHandler;
|
||||
import com.github.houbb.heaven.util.common.ArgUtil;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.sensitive.word.api.*;
|
||||
import com.github.houbb.sensitive.word.api.combine.IWordAllowDenyCombine;
|
||||
@@ -70,6 +69,9 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
||||
*/
|
||||
private boolean ignoreRepeat = false;
|
||||
|
||||
private boolean failFastWordPattern = true;
|
||||
|
||||
|
||||
// 开启校验
|
||||
/**
|
||||
* 启用数字检测
|
||||
@@ -278,6 +280,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
||||
context.ignoreChineseStyle(ignoreChineseStyle);
|
||||
context.ignoreEnglishStyle(ignoreEnglishStyle);
|
||||
context.ignoreRepeat(ignoreRepeat);
|
||||
context.failFastWordPattern(failFastWordPattern);
|
||||
|
||||
// 开启校验
|
||||
context.enableNumCheck(enableNumCheck);
|
||||
@@ -579,6 +582,10 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
||||
this.ignoreRepeat = ignoreRepeat;
|
||||
return this;
|
||||
}
|
||||
public SensitiveWordBs failFastWordPattern(boolean failFastWordPattern) {
|
||||
this.failFastWordPattern = failFastWordPattern;
|
||||
return this;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------------ 公开方法 START
|
||||
/**
|
||||
|
||||
@@ -10,6 +10,12 @@ import com.github.houbb.sensitive.word.support.check.WordChecks;
|
||||
*/
|
||||
public class SensitiveWordContext implements IWordContext {
|
||||
|
||||
|
||||
/**
|
||||
* issue110
|
||||
*/
|
||||
private boolean failFastWordPattern;
|
||||
|
||||
/**
|
||||
* 忽略大小写
|
||||
* @since 0.0.4
|
||||
@@ -220,6 +226,17 @@ public class SensitiveWordContext implements IWordContext {
|
||||
return new SensitiveWordContext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean failFastWordPattern() {
|
||||
return failFastWordPattern;
|
||||
}
|
||||
|
||||
public IWordContext failFastWordPattern(boolean failFastWordPattern){
|
||||
this.failFastWordPattern=failFastWordPattern;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean ignoreCase() {
|
||||
return ignoreCase;
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
package com.github.houbb.sensitive.word.support.check;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
|
||||
import com.github.houbb.sensitive.word.api.IWordCheck;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordData;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
|
||||
import com.github.houbb.sensitive.word.support.result.WordLengthResult;
|
||||
|
||||
@@ -44,47 +42,48 @@ public class WordCheckWord extends AbstractWordCheck {
|
||||
final IWordData wordData = context.wordData();
|
||||
final IWordData wordDataAllow = context.wordDataAllow();
|
||||
final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore();
|
||||
final boolean failFast = context.failFastWordPattern();
|
||||
|
||||
// 前一个条件
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
char[] rawChars = txt.toCharArray();
|
||||
|
||||
int tempLen = 0;
|
||||
int maxWhite = 0;
|
||||
int maxBlack = 0;
|
||||
boolean firstCheck = true;
|
||||
|
||||
WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
|
||||
WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
|
||||
|
||||
for (int i = beginIndex; i < rawChars.length; i++) {
|
||||
if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) {
|
||||
tempLen++;
|
||||
continue;
|
||||
}
|
||||
|
||||
char mappingChar = formatCharMapping.get(rawChars[i]);
|
||||
stringBuilder.append(mappingChar);
|
||||
tempLen++;
|
||||
|
||||
if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow)) {
|
||||
wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
|
||||
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) {
|
||||
maxWhite += tempLen;
|
||||
wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND;
|
||||
WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
|
||||
WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
|
||||
|
||||
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) {
|
||||
maxWhite += tempLen;
|
||||
if (!failFast) {
|
||||
//此处将tempLen设为0,为了防止重复累加
|
||||
tempLen = 0;
|
||||
}else{
|
||||
//为falFast模式,主动设为notFound退出循环
|
||||
wordContainsTypeEnumAllow=WordContainsTypeEnum.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
|
||||
wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
|
||||
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) {
|
||||
maxBlack += tempLen;
|
||||
wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND;
|
||||
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) {
|
||||
maxBlack += tempLen;
|
||||
if (!failFast) {
|
||||
//此处将tempLen设为0,为了防止重复累加
|
||||
tempLen = 0;
|
||||
}else{
|
||||
//为falFast模式,主动设为notFound退出循环
|
||||
wordContainsTypeEnumDeny=WordContainsTypeEnum.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
firstCheck = false;
|
||||
|
||||
if (WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow) &&
|
||||
WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
|
||||
break;
|
||||
|
||||
@@ -1,96 +0,0 @@
|
||||
package com.github.houbb.sensitive.word.support.check;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
|
||||
import com.github.houbb.sensitive.word.api.IWordCheck;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordData;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
|
||||
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
|
||||
import com.github.houbb.sensitive.word.support.result.WordLengthResult;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* 敏感词监测实现
|
||||
* @author binbin.hou
|
||||
* @since 0.26.0
|
||||
*/
|
||||
@Deprecated
|
||||
public class WordCheckWordMaxLen extends AbstractWordCheck {
|
||||
|
||||
@Override
|
||||
protected Class<? extends IWordCheck> getSensitiveCheckClass() {
|
||||
return WordCheckWordMaxLen.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected WordLengthResult getActualLength(int beginIndex, InnerSensitiveWordContext innerContext) {
|
||||
final String txt = innerContext.originalText();
|
||||
final Map<Character, Character> formatCharMapping = innerContext.formatCharMapping();
|
||||
final IWordContext context = innerContext.wordContext();
|
||||
final IWordData wordData = context.wordData();
|
||||
final IWordData wordDataAllow = context.wordDataAllow();
|
||||
final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore();
|
||||
|
||||
// 前一个条件
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
char[] rawChars = txt.toCharArray();
|
||||
|
||||
int tempLen = 0;
|
||||
int maxWhite = 0;
|
||||
int maxBlack = 0;
|
||||
boolean firstCheck = true;
|
||||
|
||||
WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
|
||||
WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
|
||||
|
||||
for (int i = beginIndex; i < rawChars.length; i++) {
|
||||
if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) {
|
||||
tempLen++;
|
||||
continue;
|
||||
}
|
||||
|
||||
char mappingChar = formatCharMapping.get(rawChars[i]);
|
||||
stringBuilder.append(mappingChar);
|
||||
tempLen++;
|
||||
|
||||
if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow)) {
|
||||
wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
|
||||
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) {
|
||||
maxWhite += tempLen;
|
||||
wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
// 黑名单命中
|
||||
if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
|
||||
wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
|
||||
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) {
|
||||
maxBlack += tempLen;
|
||||
wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
// 不再是第一次检测
|
||||
firstCheck = false;
|
||||
|
||||
// 黑白名单都未匹配
|
||||
if (WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow) &&
|
||||
WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return WordLengthResult.newInstance()
|
||||
.wordAllowLen(maxWhite)
|
||||
.wordDenyLen(maxBlack);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getType() {
|
||||
return WordTypeEnum.WORD.getCode();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class SensitiveWordFailFastTest {
|
||||
|
||||
@Test
|
||||
public void failFastTest() {
|
||||
SensitiveWordBs bs = SensitiveWordBs.newInstance()
|
||||
.failFastWordPattern(true)
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Arrays.asList("我的世界", "我的");
|
||||
}
|
||||
}).init();
|
||||
|
||||
String text = "我在我的家里玩我的世界";
|
||||
|
||||
List<String> textList = bs.findAll(text);
|
||||
Assert.assertEquals(Arrays.asList("我的", "我的"), textList);
|
||||
|
||||
}
|
||||
@Test
|
||||
public void fallOverTest() {
|
||||
SensitiveWordBs bs = SensitiveWordBs.newInstance()
|
||||
.failFastWordPattern(false)
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Arrays.asList("我的世界", "我的");
|
||||
}
|
||||
}).init();
|
||||
|
||||
String text = "我在我的家里玩我的世界";
|
||||
|
||||
List<String> textList = bs.findAll(text);
|
||||
Assert.assertEquals(Arrays.asList("我的", "我的世界"), textList);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class SensitiveWordMaxFirstTest {
|
||||
|
||||
@Test
|
||||
public void maxFirstTest() {
|
||||
SensitiveWordBs bs = SensitiveWordBs.newInstance()
|
||||
.wordDeny(new IWordDeny() {
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Arrays.asList("我的世界", "我的");
|
||||
}
|
||||
}).init();
|
||||
|
||||
String text = "我的世界我的好玩";
|
||||
|
||||
List<String> textList = bs.findAll(text);
|
||||
// Assert.assertEquals("", textList.toString());
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user