mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
release branch 0.11.0
This commit is contained in:
@@ -212,3 +212,9 @@
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|------------------|:--------------------|:------|
|
||||
| 1 | A | 添加脏词的标签接口,便于后续拓展 | 2023-12-05 23:51:58 | |
|
||||
|
||||
# release_0.11.0
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|----------------------|:--------------------|:------|
|
||||
| 1 | A | 添加忽略字符接口,便于跳过一些干扰的字符 | 2023-12-08 23:51:58 | |
|
||||
39
README.md
39
README.md
@@ -42,6 +42,8 @@
|
||||
|
||||
- [支持敏感词的标签接口](https://github.com/houbb/sensitive-word#%E6%95%8F%E6%84%9F%E8%AF%8D%E6%A0%87%E7%AD%BE)
|
||||
|
||||
- [支持跳过一些特殊字符,让匹配更灵活]()
|
||||
|
||||
## 变更日志
|
||||
|
||||
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md)
|
||||
@@ -60,7 +62,7 @@
|
||||
<dependency>
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.10.0</version>
|
||||
<version>0.11.0</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@@ -390,6 +392,41 @@ Assert.assertTrue(wordBs.contains(text));
|
||||
| 10 | enableWordCheck | 是否启用敏感单词检测 | true |
|
||||
| 11 | numCheckLen | 数字检测,自定义指定长度。 | 8 |
|
||||
|
||||
|
||||
# 忽略字符
|
||||
|
||||
## 说明
|
||||
|
||||
我们的敏感词一般都是比较连续的,比如【傻帽】
|
||||
|
||||
那就有大聪明发现,可以在中间加一些字符,比如【傻!@#$帽】跳过检测,但是骂人等攻击力不减。
|
||||
|
||||
那么,如何应对这些类似的场景呢?
|
||||
|
||||
我们可以指定特殊字符的跳过集合,忽略掉这些无意义的字符即可。
|
||||
|
||||
v0.11.0 开始支持
|
||||
|
||||
## 例子
|
||||
|
||||
其中 charIgnore 对应的字符策略,用户可以自行灵活定义。
|
||||
|
||||
```java
|
||||
final String text = "傻@冒,狗+东西";
|
||||
|
||||
//默认因为有特殊字符分割,无法识别
|
||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||
Assert.assertEquals("[]", wordList.toString());
|
||||
|
||||
// 指定忽略的字符策略,可自行实现。
|
||||
List<String> wordList2 = SensitiveWordBs.newInstance()
|
||||
.charIgnore(SensitiveWordCharIgnores.specialChars())
|
||||
.init()
|
||||
.findAll(text);
|
||||
|
||||
Assert.assertEquals("[傻@冒, 狗+东西]", wordList2.toString());
|
||||
```
|
||||
|
||||
# 敏感词标签
|
||||
|
||||
## 说明
|
||||
|
||||
4
pom.xml
4
pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.10.0</version>
|
||||
<version>0.11.0</version>
|
||||
|
||||
<properties>
|
||||
<!--============================== All Plugins START ==============================-->
|
||||
@@ -25,7 +25,7 @@
|
||||
<project.compiler.level>1.7</project.compiler.level>
|
||||
|
||||
<!--============================== INTER ==============================-->
|
||||
<heaven.version>0.2.7</heaven.version>
|
||||
<heaven.version>0.6.0</heaven.version>
|
||||
<opencc4j.version>1.8.1</opencc4j.version>
|
||||
|
||||
<!--============================== OTHER ==============================-->
|
||||
|
||||
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
|
||||
|
||||
:: 版本号信息(需要手动指定)
|
||||
:::: 旧版本名称
|
||||
SET version=0.10.0
|
||||
SET version=0.11.0
|
||||
:::: 新版本名称
|
||||
SET newVersion=0.11.0
|
||||
SET newVersion=0.12.0
|
||||
:::: 组织名称
|
||||
SET groupName=com.github.houbb
|
||||
:::: 项目名称
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.github.houbb.sensitive.word.api;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
|
||||
/**
|
||||
* 是否忽略某一个字符
|
||||
* @since 0.11.0
|
||||
*/
|
||||
public interface ISensitiveWordCharIgnore {
|
||||
|
||||
/**
|
||||
* 是否忽略当前字符
|
||||
* @param ix 下标志
|
||||
* @param chars 字符数组
|
||||
* @param innerContext 上下文
|
||||
* @return 结果
|
||||
*/
|
||||
boolean ignore(final int ix,
|
||||
final char[] chars,
|
||||
InnerSensitiveWordContext innerContext);
|
||||
|
||||
}
|
||||
@@ -237,4 +237,8 @@ public interface IWordContext {
|
||||
|
||||
SensitiveWordContext wordTag(IWordTag wordTag);
|
||||
|
||||
ISensitiveWordCharIgnore charIgnore();
|
||||
|
||||
SensitiveWordContext charIgnore(ISensitiveWordCharIgnore charIgnore);
|
||||
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ import com.github.houbb.sensitive.word.support.combine.check.WordCheckCombines;
|
||||
import com.github.houbb.sensitive.word.support.combine.format.WordFormatCombines;
|
||||
import com.github.houbb.sensitive.word.support.data.WordDatas;
|
||||
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
|
||||
import com.github.houbb.sensitive.word.support.replace.WordReplaces;
|
||||
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
|
||||
import com.github.houbb.sensitive.word.support.tag.WordTags;
|
||||
@@ -156,6 +157,12 @@ public class SensitiveWordBs {
|
||||
*/
|
||||
private IWordTag wordTag = WordTags.none();
|
||||
|
||||
/**
|
||||
* 忽略的字符策略
|
||||
* @since 0.11.0
|
||||
*/
|
||||
private ISensitiveWordCharIgnore charIgnore = SensitiveWordCharIgnores.defaults();
|
||||
|
||||
/**
|
||||
* 新建验证实例
|
||||
* <p>
|
||||
@@ -225,10 +232,18 @@ public class SensitiveWordBs {
|
||||
context.wordReplace(wordReplace);
|
||||
context.wordData(wordData);
|
||||
context.wordTag(wordTag);
|
||||
context.charIgnore(charIgnore);
|
||||
|
||||
return context;
|
||||
}
|
||||
|
||||
public SensitiveWordBs charIgnore(ISensitiveWordCharIgnore charIgnore) {
|
||||
ArgUtil.notNull(charIgnore, "charIgnore");
|
||||
|
||||
this.charIgnore = charIgnore;
|
||||
return this;
|
||||
}
|
||||
|
||||
public SensitiveWordBs wordTag(IWordTag wordTag) {
|
||||
ArgUtil.notNull(wordTag, "wordTag");
|
||||
|
||||
|
||||
@@ -107,6 +107,12 @@ public class SensitiveWordContext implements IWordContext {
|
||||
*/
|
||||
private IWordTag wordTag;
|
||||
|
||||
/**
|
||||
* 忽略的字符
|
||||
* @since 0.11.0
|
||||
*/
|
||||
private ISensitiveWordCharIgnore charIgnore;
|
||||
|
||||
public IWordData wordData() {
|
||||
return wordData;
|
||||
}
|
||||
@@ -290,4 +296,12 @@ public class SensitiveWordContext implements IWordContext {
|
||||
return this;
|
||||
}
|
||||
|
||||
public ISensitiveWordCharIgnore charIgnore() {
|
||||
return charIgnore;
|
||||
}
|
||||
|
||||
public SensitiveWordContext charIgnore(ISensitiveWordCharIgnore charIgnore) {
|
||||
this.charIgnore = charIgnore;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package com.github.houbb.sensitive.word.support.check;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
|
||||
import com.github.houbb.sensitive.word.api.IWordCheck;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.api.IWordData;
|
||||
@@ -46,16 +47,26 @@ public class WordCheckWord extends AbstractWordCheck {
|
||||
// 前一个条件
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
char[] rawChars = txt.toCharArray();
|
||||
|
||||
final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore();
|
||||
int tempLen = 0;
|
||||
for(int i = beginIndex; i < rawChars.length; i++) {
|
||||
// 判断是否跳过?
|
||||
if(wordCharIgnore.ignore(i, rawChars, innerContext)) {
|
||||
tempLen++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 映射处理
|
||||
final char currentChar = rawChars[i];
|
||||
char mappingChar = formatCharMapping.get(currentChar);
|
||||
stringBuilder.append(mappingChar);
|
||||
tempLen++;
|
||||
|
||||
// 判断是否存在
|
||||
WordContainsTypeEnum wordContainsTypeEnum = wordData.contains(stringBuilder, innerContext);
|
||||
if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) {
|
||||
actualLength = stringBuilder.length();
|
||||
actualLength = tempLen;
|
||||
|
||||
// 是否遍历全部匹配的模式
|
||||
if(WordValidModeEnum.FAIL_FAST.equals(wordValidModeEnum)) {
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
package com.github.houbb.sensitive.word.support.ignore;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
|
||||
/**
|
||||
* 抽象实现
|
||||
* @since 0.11.0
|
||||
*/
|
||||
public abstract class AbstractSensitiveWordCharIgnore implements ISensitiveWordCharIgnore {
|
||||
|
||||
protected abstract boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext);
|
||||
|
||||
@Override
|
||||
public boolean ignore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
|
||||
return doIgnore(ix, chars, innerContext);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.github.houbb.sensitive.word.support.ignore;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
|
||||
/**
|
||||
* 特殊字符忽略
|
||||
* @since 0.11.0
|
||||
*/
|
||||
public class NoneSensitiveWordCharIgnore extends AbstractSensitiveWordCharIgnore {
|
||||
|
||||
@Override
|
||||
protected boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.github.houbb.sensitive.word.support.ignore;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
|
||||
|
||||
/**
|
||||
* @since 0.11.0
|
||||
*/
|
||||
public class SensitiveWordCharIgnores {
|
||||
|
||||
public static ISensitiveWordCharIgnore specialChars() {
|
||||
return new SpecialCharSensitiveWordCharIgnore();
|
||||
}
|
||||
|
||||
public static ISensitiveWordCharIgnore none() {
|
||||
return new NoneSensitiveWordCharIgnore();
|
||||
}
|
||||
|
||||
public static ISensitiveWordCharIgnore defaults() {
|
||||
return none();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
package com.github.houbb.sensitive.word.support.ignore;
|
||||
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* 特殊字符忽略
|
||||
* @since 0.11.0
|
||||
*/
|
||||
public class SpecialCharSensitiveWordCharIgnore extends AbstractSensitiveWordCharIgnore {
|
||||
|
||||
private static final String SPECIAL = "`-=~!@#$%^&*()_+[]{}\\|;:'\",./<>?";
|
||||
|
||||
private static final Set<Character> SET;
|
||||
|
||||
static {
|
||||
SET = StringUtil.toCharSet(SPECIAL);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
|
||||
char c = chars[ix];
|
||||
return SET.contains(c);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -65141,3 +65141,8 @@ z以留吧以其以武
|
||||
龟投
|
||||
龟毛
|
||||
𫔰苞价咯
|
||||
傻逼
|
||||
傻冒
|
||||
狗东西
|
||||
草你大爷
|
||||
操你大爷
|
||||
@@ -38,4 +38,11 @@ public class SensitiveWordBsEmailTest {
|
||||
Assert.assertEquals("[123456789, xx.com]", wordList.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void emailTest() {
|
||||
final String text = "你我.他你";
|
||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||
Assert.assertEquals("[]", wordList.toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* <p> project: sensitive-word-SensitiveWordBsTest </p>
|
||||
* <p> create on 2020/1/7 23:43 </p>
|
||||
*
|
||||
* @author Administrator
|
||||
* @since 0.11.0
|
||||
*/
|
||||
public class SensitiveWordBsIgnoreCharTest {
|
||||
|
||||
/**
|
||||
* 忽略中文繁简体
|
||||
* @since 0.0.6
|
||||
*/
|
||||
@Test
|
||||
public void ignoreChineseStyleTest() {
|
||||
final String text = "傻@冒,狗+东西";
|
||||
|
||||
//默认因为有特殊字符分割,无法识别
|
||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||
Assert.assertEquals("[]", wordList.toString());
|
||||
|
||||
// 指定忽略的字符策略,可自行实现。
|
||||
List<String> wordList2 = SensitiveWordBs.newInstance()
|
||||
.charIgnore(SensitiveWordCharIgnores.specialChars())
|
||||
.init()
|
||||
.findAll(text);
|
||||
|
||||
Assert.assertEquals("[傻@冒, 狗+东西]", wordList2.toString());
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user