mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
@@ -19,7 +19,7 @@ public interface ISensitiveWord {
|
|||||||
* @see WordValidModeEnum#FAIL_OVER 建议使用全部检测返回模式
|
* @see WordValidModeEnum#FAIL_OVER 建议使用全部检测返回模式
|
||||||
*/
|
*/
|
||||||
List<IWordResult> findAll(final String string,
|
List<IWordResult> findAll(final String string,
|
||||||
final IWordContext context);
|
final IWordContext context);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 返回第一个对应的敏感词
|
* 返回第一个对应的敏感词
|
||||||
@@ -29,7 +29,7 @@ public interface ISensitiveWord {
|
|||||||
* @since 0.3.2
|
* @since 0.3.2
|
||||||
*/
|
*/
|
||||||
IWordResult findFirst(final String string,
|
IWordResult findFirst(final String string,
|
||||||
final IWordContext context);
|
final IWordContext context);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 替换所有敏感词内容
|
* 替换所有敏感词内容
|
||||||
@@ -42,7 +42,7 @@ public interface ISensitiveWord {
|
|||||||
* @since 0.3.2
|
* @since 0.3.2
|
||||||
*/
|
*/
|
||||||
String replace(final String target,
|
String replace(final String target,
|
||||||
final IWordContext context);
|
final IWordContext context);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 包含
|
* 包含
|
||||||
@@ -52,6 +52,6 @@ public interface ISensitiveWord {
|
|||||||
* @since 0.3.2
|
* @since 0.3.2
|
||||||
*/
|
*/
|
||||||
boolean contains(final String string,
|
boolean contains(final String string,
|
||||||
final IWordContext context);
|
final IWordContext context);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,6 +8,17 @@ import com.github.houbb.sensitive.word.bs.SensitiveWordContext;
|
|||||||
*/
|
*/
|
||||||
public interface IWordContext {
|
public interface IWordContext {
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 为true时,遇到第一个敏感词词就返回
|
||||||
|
* 解决issue110
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
boolean failFastWordPattern();
|
||||||
|
|
||||||
|
IWordContext failFastWordPattern(boolean failFastWordPattern);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 是否忽略大小写
|
* 是否忽略大小写
|
||||||
* @return 是否
|
* @return 是否
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package com.github.houbb.sensitive.word.bs;
|
|||||||
|
|
||||||
import com.github.houbb.heaven.support.handler.IHandler;
|
import com.github.houbb.heaven.support.handler.IHandler;
|
||||||
import com.github.houbb.heaven.util.common.ArgUtil;
|
import com.github.houbb.heaven.util.common.ArgUtil;
|
||||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
|
||||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||||
import com.github.houbb.sensitive.word.api.*;
|
import com.github.houbb.sensitive.word.api.*;
|
||||||
import com.github.houbb.sensitive.word.api.combine.IWordAllowDenyCombine;
|
import com.github.houbb.sensitive.word.api.combine.IWordAllowDenyCombine;
|
||||||
@@ -70,6 +69,9 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
|||||||
*/
|
*/
|
||||||
private boolean ignoreRepeat = false;
|
private boolean ignoreRepeat = false;
|
||||||
|
|
||||||
|
private boolean failFastWordPattern = true;
|
||||||
|
|
||||||
|
|
||||||
// 开启校验
|
// 开启校验
|
||||||
/**
|
/**
|
||||||
* 启用数字检测
|
* 启用数字检测
|
||||||
@@ -278,6 +280,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
|||||||
context.ignoreChineseStyle(ignoreChineseStyle);
|
context.ignoreChineseStyle(ignoreChineseStyle);
|
||||||
context.ignoreEnglishStyle(ignoreEnglishStyle);
|
context.ignoreEnglishStyle(ignoreEnglishStyle);
|
||||||
context.ignoreRepeat(ignoreRepeat);
|
context.ignoreRepeat(ignoreRepeat);
|
||||||
|
context.failFastWordPattern(failFastWordPattern);
|
||||||
|
|
||||||
// 开启校验
|
// 开启校验
|
||||||
context.enableNumCheck(enableNumCheck);
|
context.enableNumCheck(enableNumCheck);
|
||||||
@@ -579,6 +582,10 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
|||||||
this.ignoreRepeat = ignoreRepeat;
|
this.ignoreRepeat = ignoreRepeat;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
public SensitiveWordBs failFastWordPattern(boolean failFastWordPattern) {
|
||||||
|
this.failFastWordPattern = failFastWordPattern;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------------ 公开方法 START
|
//------------------------------------------------------------------------------------ 公开方法 START
|
||||||
/**
|
/**
|
||||||
@@ -668,6 +675,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
|
|||||||
return sensitiveWord.replace(target, context);
|
return sensitiveWord.replace(target, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 获取敏感词的标签
|
* 获取敏感词的标签
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -10,6 +10,12 @@ import com.github.houbb.sensitive.word.support.check.WordChecks;
|
|||||||
*/
|
*/
|
||||||
public class SensitiveWordContext implements IWordContext {
|
public class SensitiveWordContext implements IWordContext {
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* issue110
|
||||||
|
*/
|
||||||
|
private boolean failFastWordPattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 忽略大小写
|
* 忽略大小写
|
||||||
* @since 0.0.4
|
* @since 0.0.4
|
||||||
@@ -220,6 +226,19 @@ public class SensitiveWordContext implements IWordContext {
|
|||||||
return new SensitiveWordContext();
|
return new SensitiveWordContext();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean failFastWordPattern() {
|
||||||
|
return failFastWordPattern;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IWordContext failFastWordPattern(boolean failFastWordPattern){
|
||||||
|
this.failFastWordPattern=failFastWordPattern;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean ignoreCase() {
|
public boolean ignoreCase() {
|
||||||
return ignoreCase;
|
return ignoreCase;
|
||||||
|
|||||||
@@ -40,8 +40,8 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
|
|||||||
*/
|
*/
|
||||||
protected String doReplace(String target, List<IWordResult> allList, IWordContext context) {
|
protected String doReplace(String target, List<IWordResult> allList, IWordContext context) {
|
||||||
// 根据 index 直接分割
|
// 根据 index 直接分割
|
||||||
|
|
||||||
final IWordReplace replace = context.wordReplace();
|
final IWordReplace replace = context.wordReplace();
|
||||||
|
|
||||||
// 是否需要对 allList 排序?
|
// 是否需要对 allList 排序?
|
||||||
StringBuilder stringBuilder = new StringBuilder();
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
|
||||||
@@ -103,7 +103,6 @@ public abstract class AbstractSensitiveWord implements ISensitiveWord {
|
|||||||
|
|
||||||
return doReplace(target, allList, context);
|
return doReplace(target, allList, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean contains(String string, IWordContext context) {
|
public boolean contains(String string, IWordContext context) {
|
||||||
//1. 第一个存在
|
//1. 第一个存在
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ public class SensitiveWord extends AbstractSensitiveWord {
|
|||||||
@Override
|
@Override
|
||||||
protected IWordResult doFindFirst(String string, IWordContext context) {
|
protected IWordResult doFindFirst(String string, IWordContext context) {
|
||||||
List<IWordResult> wordResults = innerSensitiveWords(string, WordValidModeEnum.FAIL_FAST, context);
|
List<IWordResult> wordResults = innerSensitiveWords(string, WordValidModeEnum.FAIL_FAST, context);
|
||||||
if(!CollectionUtil.isEmpty(wordResults)){
|
if (!CollectionUtil.isEmpty(wordResults)) {
|
||||||
return wordResults.get(0);
|
return wordResults.get(0);
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
@@ -47,14 +47,14 @@ public class SensitiveWord extends AbstractSensitiveWord {
|
|||||||
/**
|
/**
|
||||||
* 获取敏感词列表
|
* 获取敏感词列表
|
||||||
*
|
*
|
||||||
* @param text 文本
|
* @param text 文本
|
||||||
* @param modeEnum 模式
|
* @param modeEnum 模式
|
||||||
* @return 结果列表
|
* @return 结果列表
|
||||||
* @since 0.0.1
|
* @since 0.0.1
|
||||||
*/
|
*/
|
||||||
private List<IWordResult> innerSensitiveWords(final String text,
|
private List<IWordResult> innerSensitiveWords(final String text,
|
||||||
final WordValidModeEnum modeEnum,
|
final WordValidModeEnum modeEnum,
|
||||||
final IWordContext context) {
|
final IWordContext context) {
|
||||||
//1. 是否存在敏感词,如果比存在,直接返回空列表
|
//1. 是否存在敏感词,如果比存在,直接返回空列表
|
||||||
final IWordCheck sensitiveCheck = context.sensitiveCheck();
|
final IWordCheck sensitiveCheck = context.sensitiveCheck();
|
||||||
List<IWordResult> resultList = Guavas.newArrayList();
|
List<IWordResult> resultList = Guavas.newArrayList();
|
||||||
@@ -74,38 +74,32 @@ public class SensitiveWord extends AbstractSensitiveWord {
|
|||||||
// v0.21.0 白名单跳过
|
// v0.21.0 白名单跳过
|
||||||
WordCheckResult checkResult = sensitiveCheck.sensitiveCheck(i, checkContext);
|
WordCheckResult checkResult = sensitiveCheck.sensitiveCheck(i, checkContext);
|
||||||
int wordLengthAllow = checkResult.wordLengthResult().wordAllowLen();
|
int wordLengthAllow = checkResult.wordLengthResult().wordAllowLen();
|
||||||
if(wordLengthAllow > 0) {
|
int wordLengthDeny = checkResult.wordLengthResult().wordDenyLen();
|
||||||
i += wordLengthAllow-1;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
//如果命中的白名单长度小于黑名单,则直接对黑名单的敏感词进行保存
|
||||||
// 命中
|
if (wordLengthAllow < wordLengthDeny) {
|
||||||
final WordLengthResult wordLengthResult = checkResult.wordLengthResult();
|
|
||||||
int wordLength = wordLengthResult.wordDenyLen();
|
|
||||||
if (wordLength > 0) {
|
|
||||||
// 保存敏感词
|
// 保存敏感词
|
||||||
WordResult wordResult = WordResult.newInstance()
|
WordResult wordResult = WordResult.newInstance()
|
||||||
.startIndex(i)
|
.startIndex(i)
|
||||||
.endIndex(i+wordLength)
|
.endIndex(i + wordLengthDeny)
|
||||||
.type(checkResult.type())
|
.type(checkResult.type())
|
||||||
.word(wordLengthResult.wordDeny());
|
.word(checkResult.wordLengthResult().wordDeny());
|
||||||
|
|
||||||
//v0.13.0 添加判断
|
//v0.13.0 添加判断
|
||||||
if(wordResultCondition.match(wordResult, text, modeEnum, context)) {
|
if (wordResultCondition.match(wordResult, text, modeEnum, context)) {
|
||||||
resultList.add(wordResult);
|
resultList.add(wordResult);
|
||||||
// 快速返回
|
// 快速返回
|
||||||
if (WordValidModeEnum.FAIL_FAST.equals(modeEnum)) {
|
if (WordValidModeEnum.FAIL_FAST.equals(modeEnum)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// 增加 i 的步长
|
// 增加 i 的步长
|
||||||
// 为什么要-1,因为默认就会自增1
|
// 为什么要-1,因为默认就会自增1
|
||||||
// TODO: 这里可以根据字符串匹配算法优化。
|
// TODO: 这里可以根据字符串匹配算法优化。
|
||||||
i += wordLength - 1;
|
i += wordLengthDeny - 1;
|
||||||
|
} else {
|
||||||
|
//如果命中的白名单长度大于黑名单长度,则跳过白名单个字符
|
||||||
|
i += Math.max(0, wordLengthAllow - 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,16 +1,16 @@
|
|||||||
package com.github.houbb.sensitive.word.support.check;
|
package com.github.houbb.sensitive.word.support.check;
|
||||||
|
|
||||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
|
||||||
import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
|
import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
|
||||||
import com.github.houbb.sensitive.word.api.IWordCheck;
|
import com.github.houbb.sensitive.word.api.IWordCheck;
|
||||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||||
import com.github.houbb.sensitive.word.api.IWordData;
|
import com.github.houbb.sensitive.word.api.IWordData;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordFormat;
|
||||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||||
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
|
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
|
||||||
import com.github.houbb.sensitive.word.constant.enums.WordValidModeEnum;
|
|
||||||
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
|
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
|
||||||
import com.github.houbb.sensitive.word.support.result.WordLengthResult;
|
import com.github.houbb.sensitive.word.support.result.WordLengthResult;
|
||||||
|
import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
@@ -44,66 +44,63 @@ public class WordCheckWord extends AbstractWordCheck {
|
|||||||
final IWordData wordData = context.wordData();
|
final IWordData wordData = context.wordData();
|
||||||
final IWordData wordDataAllow = context.wordDataAllow();
|
final IWordData wordDataAllow = context.wordDataAllow();
|
||||||
final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore();
|
final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore();
|
||||||
|
final boolean failFast = context.failFastWordPattern();
|
||||||
|
|
||||||
// 前一个条件
|
|
||||||
StringBuilder stringBuilder = new StringBuilder();
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
char[] rawChars = txt.toCharArray();
|
char[] rawChars = txt.toCharArray();
|
||||||
|
|
||||||
int tempLen = 0;
|
int tempLen = 0;
|
||||||
int maxWhite = 0;
|
int maxWhite = 0;
|
||||||
int maxBlack = 0;
|
int maxBlack = 0;
|
||||||
boolean firstCheck = true;
|
int skipLen = 0;
|
||||||
String blackWord = null;
|
|
||||||
String whiteWord = null;
|
|
||||||
|
|
||||||
WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
|
|
||||||
WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
|
|
||||||
|
|
||||||
for (int i = beginIndex; i < rawChars.length; i++) {
|
for (int i = beginIndex; i < rawChars.length; i++) {
|
||||||
if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) {
|
if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) {
|
||||||
tempLen++;
|
tempLen++;
|
||||||
|
skipLen++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
char mappingChar = formatCharMapping.get(rawChars[i]);
|
char mappingChar = formatCharMapping.get(rawChars[i]);
|
||||||
stringBuilder.append(mappingChar);
|
stringBuilder.append(mappingChar);
|
||||||
tempLen++;
|
tempLen++;
|
||||||
|
|
||||||
if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow)) {
|
WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
|
||||||
wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
|
WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
|
||||||
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) {
|
|
||||||
maxWhite += tempLen;
|
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) {
|
||||||
|
maxWhite = tempLen;
|
||||||
|
if (failFast) {
|
||||||
|
//为falFast模式,主动设为notFound退出循环
|
||||||
wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND;
|
wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND;
|
||||||
|
|
||||||
whiteWord = stringBuilder.toString();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
|
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) {
|
||||||
wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
|
maxBlack = tempLen;
|
||||||
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) {
|
if (failFast) {
|
||||||
maxBlack += tempLen;
|
//为falFast模式,主动设为notFound退出循环
|
||||||
wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND;
|
wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND;
|
||||||
|
|
||||||
blackWord = stringBuilder.toString();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
firstCheck = false;
|
|
||||||
|
|
||||||
if (WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow) &&
|
if (WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow) &&
|
||||||
WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
|
WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String string = stringBuilder.toString();
|
||||||
|
String wordAllow = string.substring(0, Math.max(0, maxWhite - skipLen));
|
||||||
|
String wordDeny = string.substring(0, Math.max(0, maxBlack - skipLen));
|
||||||
|
|
||||||
|
|
||||||
return WordLengthResult.newInstance()
|
return WordLengthResult.newInstance()
|
||||||
.wordAllowLen(maxWhite)
|
.wordAllowLen(maxWhite)
|
||||||
.wordDenyLen(maxBlack)
|
.wordDenyLen(maxBlack)
|
||||||
.wordAllow(whiteWord)
|
.wordAllow(wordAllow)
|
||||||
.wordDeny(blackWord);
|
.wordDeny(wordDeny);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected String getType() {
|
protected String getType() {
|
||||||
return WordTypeEnum.WORD.getCode();
|
return WordTypeEnum.WORD.getCode();
|
||||||
|
|||||||
@@ -1,96 +0,0 @@
|
|||||||
package com.github.houbb.sensitive.word.support.check;
|
|
||||||
|
|
||||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
|
||||||
import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
|
|
||||||
import com.github.houbb.sensitive.word.api.IWordCheck;
|
|
||||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
|
||||||
import com.github.houbb.sensitive.word.api.IWordData;
|
|
||||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
|
||||||
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
|
|
||||||
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
|
|
||||||
import com.github.houbb.sensitive.word.support.result.WordLengthResult;
|
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 敏感词监测实现
|
|
||||||
* @author binbin.hou
|
|
||||||
* @since 0.26.0
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public class WordCheckWordMaxLen extends AbstractWordCheck {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected Class<? extends IWordCheck> getSensitiveCheckClass() {
|
|
||||||
return WordCheckWordMaxLen.class;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected WordLengthResult getActualLength(int beginIndex, InnerSensitiveWordContext innerContext) {
|
|
||||||
final String txt = innerContext.originalText();
|
|
||||||
final Map<Character, Character> formatCharMapping = innerContext.formatCharMapping();
|
|
||||||
final IWordContext context = innerContext.wordContext();
|
|
||||||
final IWordData wordData = context.wordData();
|
|
||||||
final IWordData wordDataAllow = context.wordDataAllow();
|
|
||||||
final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore();
|
|
||||||
|
|
||||||
// 前一个条件
|
|
||||||
StringBuilder stringBuilder = new StringBuilder();
|
|
||||||
char[] rawChars = txt.toCharArray();
|
|
||||||
|
|
||||||
int tempLen = 0;
|
|
||||||
int maxWhite = 0;
|
|
||||||
int maxBlack = 0;
|
|
||||||
boolean firstCheck = true;
|
|
||||||
|
|
||||||
WordContainsTypeEnum wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
|
|
||||||
WordContainsTypeEnum wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
|
|
||||||
|
|
||||||
for (int i = beginIndex; i < rawChars.length; i++) {
|
|
||||||
if (wordCharIgnore.ignore(i, rawChars, innerContext) && tempLen != 0) {
|
|
||||||
tempLen++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
char mappingChar = formatCharMapping.get(rawChars[i]);
|
|
||||||
stringBuilder.append(mappingChar);
|
|
||||||
tempLen++;
|
|
||||||
|
|
||||||
if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow)) {
|
|
||||||
wordContainsTypeEnumAllow = wordDataAllow.contains(stringBuilder, innerContext);
|
|
||||||
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumAllow)) {
|
|
||||||
maxWhite += tempLen;
|
|
||||||
wordContainsTypeEnumAllow = WordContainsTypeEnum.NOT_FOUND;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 黑名单命中
|
|
||||||
if (firstCheck || !WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
|
|
||||||
wordContainsTypeEnumDeny = wordData.contains(stringBuilder, innerContext);
|
|
||||||
if (WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnumDeny)) {
|
|
||||||
maxBlack += tempLen;
|
|
||||||
wordContainsTypeEnumDeny = WordContainsTypeEnum.NOT_FOUND;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 不再是第一次检测
|
|
||||||
firstCheck = false;
|
|
||||||
|
|
||||||
// 黑白名单都未匹配
|
|
||||||
if (WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumAllow) &&
|
|
||||||
WordContainsTypeEnum.NOT_FOUND.equals(wordContainsTypeEnumDeny)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return WordLengthResult.newInstance()
|
|
||||||
.wordAllowLen(maxWhite)
|
|
||||||
.wordDenyLen(maxBlack);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected String getType() {
|
|
||||||
return WordTypeEnum.WORD.getCode();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,243 @@
|
|||||||
|
package com.github.houbb.sensitive.word.bs;
|
||||||
|
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordAllow;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class SensitiveWordFailFastTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void failFastTest() {
|
||||||
|
SensitiveWordBs bs = SensitiveWordBs.newInstance()
|
||||||
|
.failFastWordPattern(true)
|
||||||
|
.wordDeny(new IWordDeny() {
|
||||||
|
@Override
|
||||||
|
public List<String> deny() {
|
||||||
|
return Arrays.asList("我的世界", "我的");
|
||||||
|
}
|
||||||
|
}).init();
|
||||||
|
|
||||||
|
SensitiveWordBs bs1 = SensitiveWordBs.newInstance()
|
||||||
|
.failFastWordPattern(true)
|
||||||
|
.wordDeny(new IWordDeny() {
|
||||||
|
@Override
|
||||||
|
public List<String> deny() {
|
||||||
|
return Collections.singletonList("操你妈");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.wordAllow(new IWordAllow() {
|
||||||
|
@Override
|
||||||
|
public List<String> allow() {
|
||||||
|
return Collections.singletonList("你");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.init();
|
||||||
|
|
||||||
|
|
||||||
|
//黑长白短,且初始下标一致
|
||||||
|
SensitiveWordBs bs2 = SensitiveWordBs.newInstance()
|
||||||
|
.failFastWordPattern(true)
|
||||||
|
.wordDeny(new IWordDeny() {
|
||||||
|
@Override
|
||||||
|
public List<String> deny() {
|
||||||
|
return Collections.singletonList("大傻逼");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.wordAllow(new IWordAllow() {
|
||||||
|
@Override
|
||||||
|
public List<String> allow() {
|
||||||
|
return Collections.singletonList("大");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.init();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//白长黑短,且白和黑初始下标不再一起
|
||||||
|
SensitiveWordBs bs3 = SensitiveWordBs.newInstance()
|
||||||
|
.failFastWordPattern(true)
|
||||||
|
.wordDeny(new IWordDeny() {
|
||||||
|
@Override
|
||||||
|
public List<String> deny() {
|
||||||
|
return Collections.singletonList("口交");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.wordAllow(new IWordAllow() {
|
||||||
|
@Override
|
||||||
|
public List<String> allow() {
|
||||||
|
return Collections.singletonList("地铁口交易");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.init();
|
||||||
|
|
||||||
|
|
||||||
|
//白长黑短,且白和黑初始下标在一起
|
||||||
|
SensitiveWordBs bs4 = SensitiveWordBs.newInstance()
|
||||||
|
.failFastWordPattern(true)
|
||||||
|
.wordDeny(new IWordDeny() {
|
||||||
|
@Override
|
||||||
|
public List<String> deny() {
|
||||||
|
return Collections.singletonList("龟孙");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.wordAllow(new IWordAllow() {
|
||||||
|
@Override
|
||||||
|
public List<String> allow() {
|
||||||
|
return Collections.singletonList("龟孙可");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.init();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
String text = "我在我的家里玩我的世界";
|
||||||
|
List<String> textList = bs.findAll(text);
|
||||||
|
Assert.assertEquals(Arrays.asList("我的", "我的"), textList);
|
||||||
|
|
||||||
|
|
||||||
|
String text1 = "操你妈";
|
||||||
|
List<String> textList1 = bs1.findAll(text1);
|
||||||
|
Assert.assertEquals(Collections.singletonList("操你妈"), textList1);
|
||||||
|
|
||||||
|
String text2 = "大傻逼";
|
||||||
|
List<String> textList2 = bs2.findAll(text2);
|
||||||
|
Assert.assertEquals(Collections.singletonList("大傻逼"), textList2);
|
||||||
|
|
||||||
|
|
||||||
|
String text3 = "地铁口交易";
|
||||||
|
List<String> textList3 = bs3.findAll(text3);
|
||||||
|
Assert.assertTrue("Expected empty list", textList3.isEmpty());
|
||||||
|
|
||||||
|
String text4 = "龟孙可";
|
||||||
|
List<String> textList4 = bs4.findAll(text4);
|
||||||
|
Assert.assertTrue("Expected empty list", textList4.isEmpty());
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void fallOverTest() {
|
||||||
|
SensitiveWordBs bs = SensitiveWordBs.newInstance()
|
||||||
|
.failFastWordPattern(false)
|
||||||
|
.wordDeny(new IWordDeny() {
|
||||||
|
@Override
|
||||||
|
public List<String> deny() {
|
||||||
|
return Arrays.asList("我的世界", "我的");
|
||||||
|
}
|
||||||
|
}).init();
|
||||||
|
|
||||||
|
|
||||||
|
//黑长白短,且初始下标不一致
|
||||||
|
SensitiveWordBs bs1 = SensitiveWordBs.newInstance()
|
||||||
|
.failFastWordPattern(false)
|
||||||
|
.wordDeny(new IWordDeny() {
|
||||||
|
@Override
|
||||||
|
public List<String> deny() {
|
||||||
|
return Collections.singletonList("操你妈");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.wordAllow(new IWordAllow() {
|
||||||
|
@Override
|
||||||
|
public List<String> allow() {
|
||||||
|
return Collections.singletonList("你");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.init();
|
||||||
|
|
||||||
|
|
||||||
|
//黑长白短,且初始下标一致
|
||||||
|
SensitiveWordBs bs2 = SensitiveWordBs.newInstance()
|
||||||
|
.failFastWordPattern(false)
|
||||||
|
.wordDeny(new IWordDeny() {
|
||||||
|
@Override
|
||||||
|
public List<String> deny() {
|
||||||
|
return Collections.singletonList("大傻逼");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.wordAllow(new IWordAllow() {
|
||||||
|
@Override
|
||||||
|
public List<String> allow() {
|
||||||
|
return Collections.singletonList("大");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.init();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//白长黑短,且白和黑初始下标不再一起
|
||||||
|
SensitiveWordBs bs3 = SensitiveWordBs.newInstance()
|
||||||
|
.failFastWordPattern(false)
|
||||||
|
.wordDeny(new IWordDeny() {
|
||||||
|
@Override
|
||||||
|
public List<String> deny() {
|
||||||
|
return Collections.singletonList("口交");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.wordAllow(new IWordAllow() {
|
||||||
|
@Override
|
||||||
|
public List<String> allow() {
|
||||||
|
return Collections.singletonList("地铁口交易");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.init();
|
||||||
|
|
||||||
|
|
||||||
|
//白长黑短,且白和黑初始下标在一起
|
||||||
|
SensitiveWordBs bs4 = SensitiveWordBs.newInstance()
|
||||||
|
.failFastWordPattern(false)
|
||||||
|
.wordDeny(new IWordDeny() {
|
||||||
|
@Override
|
||||||
|
public List<String> deny() {
|
||||||
|
return Collections.singletonList("龟孙");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.wordAllow(new IWordAllow() {
|
||||||
|
@Override
|
||||||
|
public List<String> allow() {
|
||||||
|
return Collections.singletonList("龟孙可");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.init();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
String text = "我在我的家里玩我的世界";
|
||||||
|
List<String> textList = bs.findAll(text);
|
||||||
|
Assert.assertEquals(Arrays.asList("我的", "我的世界"), textList);
|
||||||
|
|
||||||
|
|
||||||
|
String text1 = "操你妈";
|
||||||
|
List<String> textList1 = bs1.findAll(text1);
|
||||||
|
Assert.assertEquals(Collections.singletonList("操你妈"), textList1);
|
||||||
|
|
||||||
|
String text2 = "大傻逼";
|
||||||
|
List<String> textList2 = bs2.findAll(text2);
|
||||||
|
Assert.assertEquals(Collections.singletonList("大傻逼"), textList2);
|
||||||
|
|
||||||
|
|
||||||
|
String text3 = "地铁口交易";
|
||||||
|
List<String> textList3 = bs3.findAll(text3);
|
||||||
|
Assert.assertTrue("Expected empty list", textList3.isEmpty());
|
||||||
|
|
||||||
|
String text4 = "龟孙可";
|
||||||
|
List<String> textList4 = bs4.findAll(text4);
|
||||||
|
Assert.assertTrue("Expected empty list", textList4.isEmpty());
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
@@ -1,28 +0,0 @@
|
|||||||
package com.github.houbb.sensitive.word.bs;
|
|
||||||
|
|
||||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
|
||||||
import org.junit.Assert;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class SensitiveWordMaxFirstTest {
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void maxFirstTest() {
|
|
||||||
SensitiveWordBs bs = SensitiveWordBs.newInstance()
|
|
||||||
.wordDeny(new IWordDeny() {
|
|
||||||
@Override
|
|
||||||
public List<String> deny() {
|
|
||||||
return Arrays.asList("我的世界", "我的");
|
|
||||||
}
|
|
||||||
}).init();
|
|
||||||
|
|
||||||
String text = "我的世界我的好玩";
|
|
||||||
|
|
||||||
List<String> textList = bs.findAll(text);
|
|
||||||
// Assert.assertEquals("", textList.toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user