release branch 0.11.0

This commit is contained in:
houbb
2023-12-09 00:38:10 +08:00
parent 2ddb9e1ac6
commit 56df8bd648
16 changed files with 251 additions and 6 deletions

View File

@@ -212,3 +212,9 @@
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|------------------|:--------------------|:------|
| 1 | A | 添加脏词的标签接口,便于后续拓展 | 2023-12-05 23:51:58 | |
# release_0.11.0
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|----------------------|:--------------------|:------|
| 1 | A | 添加忽略字符接口,便于跳过一些干扰的字符 | 2023-12-08 23:51:58 | |

View File

@@ -42,6 +42,8 @@
- [支持敏感词的标签接口](https://github.com/houbb/sensitive-word#%E6%95%8F%E6%84%9F%E8%AF%8D%E6%A0%87%E7%AD%BE)
- [支持跳过一些特殊字符,让匹配更灵活]()
## 变更日志
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md)
@@ -60,7 +62,7 @@
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.10.0</version>
<version>0.11.0</version>
</dependency>
```
@@ -390,6 +392,41 @@ Assert.assertTrue(wordBs.contains(text));
| 10 | enableWordCheck | 是否启用敏感单词检测 | true |
| 11 | numCheckLen | 数字检测,自定义指定长度。 | 8 |
# 忽略字符
## 说明
我们的敏感词一般都是比较连续的,比如【傻帽】
那就有大聪明发现,可以在中间加一些字符,比如【傻!@#$帽】跳过检测,但是骂人等攻击力不减。
那么,如何应对这些类似的场景呢?
我们可以指定特殊字符的跳过集合,忽略掉这些无意义的字符即可。
v0.11.0 开始支持
## 例子
其中 charIgnore 对应的字符策略,用户可以自行灵活定义。
```java
final String text = "傻@冒,狗+东西";
//默认因为有特殊字符分割,无法识别
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[]", wordList.toString());
// 指定忽略的字符策略,可自行实现。
List<String> wordList2 = SensitiveWordBs.newInstance()
.charIgnore(SensitiveWordCharIgnores.specialChars())
.init()
.findAll(text);
Assert.assertEquals("[傻@冒, 狗+东西]", wordList2.toString());
```
# 敏感词标签
## 说明

View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.10.0</version>
<version>0.11.0</version>
<properties>
<!--============================== All Plugins START ==============================-->
@@ -25,7 +25,7 @@
<project.compiler.level>1.7</project.compiler.level>
<!--============================== INTER ==============================-->
<heaven.version>0.2.7</heaven.version>
<heaven.version>0.6.0</heaven.version>
<opencc4j.version>1.8.1</opencc4j.version>
<!--============================== OTHER ==============================-->

View File

@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
SET version=0.10.0
SET version=0.11.0
:::: 新版本名称
SET newVersion=0.11.0
SET newVersion=0.12.0
:::: 组织名称
SET groupName=com.github.houbb
:::: 项目名称

View File

@@ -0,0 +1,22 @@
package com.github.houbb.sensitive.word.api;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
/**
* 是否忽略某一个字符
* @since 0.11.0
*/
public interface ISensitiveWordCharIgnore {
/**
* 是否忽略当前字符
* @param ix 下标志
* @param chars 字符数组
* @param innerContext 上下文
* @return 结果
*/
boolean ignore(final int ix,
final char[] chars,
InnerSensitiveWordContext innerContext);
}

View File

@@ -237,4 +237,8 @@ public interface IWordContext {
SensitiveWordContext wordTag(IWordTag wordTag);
ISensitiveWordCharIgnore charIgnore();
SensitiveWordContext charIgnore(ISensitiveWordCharIgnore charIgnore);
}

View File

@@ -15,6 +15,7 @@ import com.github.houbb.sensitive.word.support.combine.check.WordCheckCombines;
import com.github.houbb.sensitive.word.support.combine.format.WordFormatCombines;
import com.github.houbb.sensitive.word.support.data.WordDatas;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
import com.github.houbb.sensitive.word.support.replace.WordReplaces;
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
import com.github.houbb.sensitive.word.support.tag.WordTags;
@@ -156,6 +157,12 @@ public class SensitiveWordBs {
*/
private IWordTag wordTag = WordTags.none();
/**
* 忽略的字符策略
* @since 0.11.0
*/
private ISensitiveWordCharIgnore charIgnore = SensitiveWordCharIgnores.defaults();
/**
* 新建验证实例
* <p>
@@ -225,10 +232,18 @@ public class SensitiveWordBs {
context.wordReplace(wordReplace);
context.wordData(wordData);
context.wordTag(wordTag);
context.charIgnore(charIgnore);
return context;
}
public SensitiveWordBs charIgnore(ISensitiveWordCharIgnore charIgnore) {
ArgUtil.notNull(charIgnore, "charIgnore");
this.charIgnore = charIgnore;
return this;
}
public SensitiveWordBs wordTag(IWordTag wordTag) {
ArgUtil.notNull(wordTag, "wordTag");

View File

@@ -107,6 +107,12 @@ public class SensitiveWordContext implements IWordContext {
*/
private IWordTag wordTag;
/**
* 忽略的字符
* @since 0.11.0
*/
private ISensitiveWordCharIgnore charIgnore;
public IWordData wordData() {
return wordData;
}
@@ -290,4 +296,12 @@ public class SensitiveWordContext implements IWordContext {
return this;
}
public ISensitiveWordCharIgnore charIgnore() {
return charIgnore;
}
public SensitiveWordContext charIgnore(ISensitiveWordCharIgnore charIgnore) {
this.charIgnore = charIgnore;
return this;
}
}

View File

@@ -1,6 +1,7 @@
package com.github.houbb.sensitive.word.support.check;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
import com.github.houbb.sensitive.word.api.IWordCheck;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordData;
@@ -46,16 +47,26 @@ public class WordCheckWord extends AbstractWordCheck {
// 前一个条件
StringBuilder stringBuilder = new StringBuilder();
char[] rawChars = txt.toCharArray();
final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore();
int tempLen = 0;
for(int i = beginIndex; i < rawChars.length; i++) {
// 判断是否跳过?
if(wordCharIgnore.ignore(i, rawChars, innerContext)) {
tempLen++;
continue;
}
// 映射处理
final char currentChar = rawChars[i];
char mappingChar = formatCharMapping.get(currentChar);
stringBuilder.append(mappingChar);
tempLen++;
// 判断是否存在
WordContainsTypeEnum wordContainsTypeEnum = wordData.contains(stringBuilder, innerContext);
if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) {
actualLength = stringBuilder.length();
actualLength = tempLen;
// 是否遍历全部匹配的模式
if(WordValidModeEnum.FAIL_FAST.equals(wordValidModeEnum)) {

View File

@@ -0,0 +1,19 @@
package com.github.houbb.sensitive.word.support.ignore;
import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
/**
* 抽象实现
* @since 0.11.0
*/
public abstract class AbstractSensitiveWordCharIgnore implements ISensitiveWordCharIgnore {
protected abstract boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext);
@Override
public boolean ignore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
return doIgnore(ix, chars, innerContext);
}
}

View File

@@ -0,0 +1,16 @@
package com.github.houbb.sensitive.word.support.ignore;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
/**
* 特殊字符忽略
* @since 0.11.0
*/
public class NoneSensitiveWordCharIgnore extends AbstractSensitiveWordCharIgnore {
@Override
protected boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
return false;
}
}

View File

@@ -0,0 +1,22 @@
package com.github.houbb.sensitive.word.support.ignore;
import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
/**
* @since 0.11.0
*/
public class SensitiveWordCharIgnores {
public static ISensitiveWordCharIgnore specialChars() {
return new SpecialCharSensitiveWordCharIgnore();
}
public static ISensitiveWordCharIgnore none() {
return new NoneSensitiveWordCharIgnore();
}
public static ISensitiveWordCharIgnore defaults() {
return none();
}
}

View File

@@ -0,0 +1,28 @@
package com.github.houbb.sensitive.word.support.ignore;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import java.util.Set;
/**
* 特殊字符忽略
* @since 0.11.0
*/
public class SpecialCharSensitiveWordCharIgnore extends AbstractSensitiveWordCharIgnore {
private static final String SPECIAL = "`-=~!@#$%^&*()_+[]{}\\|;:'\",./<>?";
private static final Set<Character> SET;
static {
SET = StringUtil.toCharSet(SPECIAL);
}
@Override
protected boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
char c = chars[ix];
return SET.contains(c);
}
}

View File

@@ -65141,3 +65141,8 @@ z以留吧以其以武
龟投
龟毛
𫔰苞价咯
傻逼
傻冒
狗东西
草你大爷
操你大爷

View File

@@ -38,4 +38,11 @@ public class SensitiveWordBsEmailTest {
Assert.assertEquals("[123456789, xx.com]", wordList.toString());
}
@Test
public void emailTest() {
final String text = "你我.他你";
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[]", wordList.toString());
}
}

View File

@@ -0,0 +1,39 @@
package com.github.houbb.sensitive.word.bs;
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
import org.junit.Assert;
import org.junit.Test;
import java.util.List;
/**
* <p> project: sensitive-word-SensitiveWordBsTest </p>
* <p> create on 2020/1/7 23:43 </p>
*
* @author Administrator
* @since 0.11.0
*/
public class SensitiveWordBsIgnoreCharTest {
/**
* 忽略中文繁简体
* @since 0.0.6
*/
@Test
public void ignoreChineseStyleTest() {
final String text = "傻@冒,狗+东西";
//默认因为有特殊字符分割,无法识别
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
Assert.assertEquals("[]", wordList.toString());
// 指定忽略的字符策略,可自行实现。
List<String> wordList2 = SensitiveWordBs.newInstance()
.charIgnore(SensitiveWordCharIgnores.specialChars())
.init()
.findAll(text);
Assert.assertEquals("[傻@冒, 狗+东西]", wordList2.toString());
}
}