mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
release branch 0.1.1
This commit is contained in:
@@ -120,3 +120,9 @@
|
|||||||
| 1 | A | 返回敏感词对应的下标范围 | 2021-8-8 20:51:58 | |
|
| 1 | A | 返回敏感词对应的下标范围 | 2021-8-8 20:51:58 | |
|
||||||
| 2 | U | ignoreRepeat 默认为 false | 2021-8-8 20:51:58 | |
|
| 2 | U | ignoreRepeat 默认为 false | 2021-8-8 20:51:58 | |
|
||||||
| 3 | U | 把测试、系统、买卖、彩票等常用词移出敏感词库 | 2021-8-8 20:51:58 | |
|
| 3 | U | 把测试、系统、买卖、彩票等常用词移出敏感词库 | 2021-8-8 20:51:58 | |
|
||||||
|
|
||||||
|
# release_0.1.1
|
||||||
|
|
||||||
|
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||||
|
|:---|:---|:---|:---|:--|
|
||||||
|
| 1 | F | 自定义敏感词 allow/deny 进行格式化处理 | 2021-12-11 23:51:58 | |
|
||||||
|
|||||||
@@ -44,11 +44,9 @@
|
|||||||
|
|
||||||
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/doc/CHANGE_LOG.md)
|
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/doc/CHANGE_LOG.md)
|
||||||
|
|
||||||
v0.1.0 变更:
|
v0.1.1 变更:
|
||||||
|
|
||||||
- 返回敏感词对应的开始结束下标信息
|
- 敏感词自定义 Allow/Deny 进行格式化处理
|
||||||
|
|
||||||
- 优化词库
|
|
||||||
|
|
||||||
# 快速开始
|
# 快速开始
|
||||||
|
|
||||||
@@ -64,7 +62,7 @@ v0.1.0 变更:
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.github.houbb</groupId>
|
<groupId>com.github.houbb</groupId>
|
||||||
<artifactId>sensitive-word</artifactId>
|
<artifactId>sensitive-word</artifactId>
|
||||||
<version>0.1.0</version>
|
<version>0.1.1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
6
pom.xml
6
pom.xml
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.github.houbb</groupId>
|
<groupId>com.github.houbb</groupId>
|
||||||
<artifactId>sensitive-word</artifactId>
|
<artifactId>sensitive-word</artifactId>
|
||||||
<version>0.1.0</version>
|
<version>0.1.1</version>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<!--============================== All Plugins START ==============================-->
|
<!--============================== All Plugins START ==============================-->
|
||||||
@@ -25,8 +25,8 @@
|
|||||||
<project.compiler.level>1.7</project.compiler.level>
|
<project.compiler.level>1.7</project.compiler.level>
|
||||||
|
|
||||||
<!--============================== INTER ==============================-->
|
<!--============================== INTER ==============================-->
|
||||||
<heaven.version>0.1.129</heaven.version>
|
<heaven.version>0.1.148</heaven.version>
|
||||||
<opencc4j.version>1.2.0</opencc4j.version>
|
<opencc4j.version>1.7.1</opencc4j.version>
|
||||||
|
|
||||||
<!--============================== OTHER ==============================-->
|
<!--============================== OTHER ==============================-->
|
||||||
<junit.version>4.13.1</junit.version>
|
<junit.version>4.13.1</junit.version>
|
||||||
|
|||||||
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
|
|||||||
|
|
||||||
:: 版本号信息(需要手动指定)
|
:: 版本号信息(需要手动指定)
|
||||||
:::: 旧版本名称
|
:::: 旧版本名称
|
||||||
SET version=0.1.0
|
SET version=0.1.1
|
||||||
:::: 新版本名称
|
:::: 新版本名称
|
||||||
SET newVersion=0.2.0
|
SET newVersion=0.1.2
|
||||||
:::: 组织名称
|
:::: 组织名称
|
||||||
SET groupName=com.github.houbb
|
SET groupName=com.github.houbb
|
||||||
:::: 项目名称
|
:::: 项目名称
|
||||||
|
|||||||
@@ -9,8 +9,9 @@ import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
|||||||
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||||
import com.github.houbb.sensitive.word.support.map.SensitiveWordMap;
|
import com.github.houbb.sensitive.word.support.map.SensitiveWordMap;
|
||||||
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
|
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
|
||||||
|
import com.github.houbb.sensitive.word.utils.InnerFormatUtils;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 敏感词引导类
|
* 敏感词引导类
|
||||||
@@ -64,7 +65,7 @@ public class SensitiveWordBs {
|
|||||||
// 加载配置信息
|
// 加载配置信息
|
||||||
List<String> denyList = wordDeny.deny();
|
List<String> denyList = wordDeny.deny();
|
||||||
List<String> allowList = wordAllow.allow();
|
List<String> allowList = wordAllow.allow();
|
||||||
List<String> results = CollectionUtil.difference(denyList, allowList);
|
List<String> results = getActualDenyList(denyList, allowList);
|
||||||
|
|
||||||
// 初始化 DFA 信息
|
// 初始化 DFA 信息
|
||||||
if(sensitiveWordMap == null) {
|
if(sensitiveWordMap == null) {
|
||||||
@@ -74,6 +75,59 @@ public class SensitiveWordBs {
|
|||||||
sensitiveWordMap.initWordMap(results);
|
sensitiveWordMap.initWordMap(results);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取禁止列表中真正的禁止词汇
|
||||||
|
* @param denyList 禁止
|
||||||
|
* @param allowList 允许
|
||||||
|
* @return 结果
|
||||||
|
* @since 0.1.1
|
||||||
|
*/
|
||||||
|
List<String> getActualDenyList(List<String> denyList,
|
||||||
|
List<String> allowList) {
|
||||||
|
if(CollectionUtil.isEmpty(denyList)) {
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
if(CollectionUtil.isEmpty(allowList)) {
|
||||||
|
return denyList;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> formatDenyList = this.formatWordList(denyList);
|
||||||
|
List<String> formatAllowList = this.formatWordList(allowList);
|
||||||
|
|
||||||
|
List<String> resultList = new ArrayList<>();
|
||||||
|
// O(1)
|
||||||
|
Set<String> allowSet = new HashSet<>(formatAllowList);
|
||||||
|
|
||||||
|
for(String deny : formatDenyList) {
|
||||||
|
if(allowSet.contains(deny)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
resultList.add(deny);
|
||||||
|
}
|
||||||
|
return resultList;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 数据格式化处理
|
||||||
|
* @param list 列表
|
||||||
|
* @return 结果
|
||||||
|
* @since 0.1.1
|
||||||
|
*/
|
||||||
|
private List<String> formatWordList(List<String> list) {
|
||||||
|
if(CollectionUtil.isEmpty(list)) {
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> resultList = new ArrayList<>(list.size());
|
||||||
|
for(String word : list) {
|
||||||
|
String formatWord = InnerFormatUtils.format(word, this.context);
|
||||||
|
resultList.add(formatWord);
|
||||||
|
}
|
||||||
|
|
||||||
|
return resultList;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 新建验证实例
|
* 新建验证实例
|
||||||
* <p>
|
* <p>
|
||||||
|
|||||||
@@ -0,0 +1,40 @@
|
|||||||
|
package com.github.houbb.sensitive.word.utils;
|
||||||
|
|
||||||
|
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||||
|
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||||
|
import com.github.houbb.sensitive.word.api.ICharFormat;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||||
|
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 内部格式化工具类
|
||||||
|
* @since 0.1.1
|
||||||
|
*/
|
||||||
|
public final class InnerFormatUtils {
|
||||||
|
|
||||||
|
private InnerFormatUtils(){}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 格式化
|
||||||
|
* @param original 原始
|
||||||
|
* @param context 上下文
|
||||||
|
* @return 结果
|
||||||
|
* @since 0.1.1
|
||||||
|
*/
|
||||||
|
public static String format(String original, IWordContext context) {
|
||||||
|
if(StringUtil.isEmpty(original)) {
|
||||||
|
return original;
|
||||||
|
}
|
||||||
|
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
ICharFormat charFormat = Instances.singleton(CharFormatChain.class);
|
||||||
|
char[] chars = original.toCharArray();
|
||||||
|
for(char c : chars) {
|
||||||
|
char cf = charFormat.format(c, context);
|
||||||
|
stringBuilder.append(cf);
|
||||||
|
}
|
||||||
|
|
||||||
|
return stringBuilder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
package com.github.houbb.sensitive.word.bugs.b20211211;
|
||||||
|
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordAllow;
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||||
|
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||||
|
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
||||||
|
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class MySensitiveTest {
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test() {
|
||||||
|
IWordDeny wordDeny = WordDenys.chains(WordDenys.system(), new MyWordDeny());
|
||||||
|
IWordAllow wordAllow = WordAllows.chains(WordAllows.system(), new MyWordAllow());
|
||||||
|
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
|
||||||
|
.wordAllow(wordAllow)
|
||||||
|
.wordDeny(wordDeny)// 各种其他配置
|
||||||
|
.init();// init() 初始化敏感词字典
|
||||||
|
|
||||||
|
final String text = "五星红旗 我的自定义敏感词尼玛";
|
||||||
|
//输出测试结果
|
||||||
|
System.out.println("敏感词:"+sensitiveWordBs.findAll(text).toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
package com.github.houbb.sensitive.word.bugs.b20211211;
|
||||||
|
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordAllow;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class MyWordAllow implements IWordAllow {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> allow() {
|
||||||
|
return Arrays.asList("五星红旗");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
package com.github.houbb.sensitive.word.bugs.b20211211;
|
||||||
|
|
||||||
|
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class MyWordDeny implements IWordDeny {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> deny() {
|
||||||
|
return Arrays.asList("尼玛");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
package com.github.houbb.sensitive.word.bugs;
|
||||||
Reference in New Issue
Block a user