mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
release branch 0.1.1
This commit is contained in:
@@ -120,3 +120,9 @@
|
||||
| 1 | A | 返回敏感词对应的下标范围 | 2021-8-8 20:51:58 | |
|
||||
| 2 | U | ignoreRepeat 默认为 false | 2021-8-8 20:51:58 | |
|
||||
| 3 | U | 把测试、系统、买卖、彩票等常用词移出敏感词库 | 2021-8-8 20:51:58 | |
|
||||
|
||||
# release_0.1.1
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:---|:---|:---|:--|
|
||||
| 1 | F | 自定义敏感词 allow/deny 进行格式化处理 | 2021-12-11 23:51:58 | |
|
||||
|
||||
@@ -44,11 +44,9 @@
|
||||
|
||||
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/doc/CHANGE_LOG.md)
|
||||
|
||||
v0.1.0 变更:
|
||||
v0.1.1 变更:
|
||||
|
||||
- 返回敏感词对应的开始结束下标信息
|
||||
|
||||
- 优化词库
|
||||
- 敏感词自定义 Allow/Deny 进行格式化处理
|
||||
|
||||
# 快速开始
|
||||
|
||||
@@ -64,7 +62,7 @@ v0.1.0 变更:
|
||||
<dependency>
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.1.0</version>
|
||||
<version>0.1.1</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
6
pom.xml
6
pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.1.0</version>
|
||||
<version>0.1.1</version>
|
||||
|
||||
<properties>
|
||||
<!--============================== All Plugins START ==============================-->
|
||||
@@ -25,8 +25,8 @@
|
||||
<project.compiler.level>1.7</project.compiler.level>
|
||||
|
||||
<!--============================== INTER ==============================-->
|
||||
<heaven.version>0.1.129</heaven.version>
|
||||
<opencc4j.version>1.2.0</opencc4j.version>
|
||||
<heaven.version>0.1.148</heaven.version>
|
||||
<opencc4j.version>1.7.1</opencc4j.version>
|
||||
|
||||
<!--============================== OTHER ==============================-->
|
||||
<junit.version>4.13.1</junit.version>
|
||||
|
||||
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
|
||||
|
||||
:: 版本号信息(需要手动指定)
|
||||
:::: 旧版本名称
|
||||
SET version=0.1.0
|
||||
SET version=0.1.1
|
||||
:::: 新版本名称
|
||||
SET newVersion=0.2.0
|
||||
SET newVersion=0.1.2
|
||||
:::: 组织名称
|
||||
SET groupName=com.github.houbb
|
||||
:::: 项目名称
|
||||
|
||||
@@ -9,8 +9,9 @@ import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
||||
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||
import com.github.houbb.sensitive.word.support.map.SensitiveWordMap;
|
||||
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
|
||||
import com.github.houbb.sensitive.word.utils.InnerFormatUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* 敏感词引导类
|
||||
@@ -64,7 +65,7 @@ public class SensitiveWordBs {
|
||||
// 加载配置信息
|
||||
List<String> denyList = wordDeny.deny();
|
||||
List<String> allowList = wordAllow.allow();
|
||||
List<String> results = CollectionUtil.difference(denyList, allowList);
|
||||
List<String> results = getActualDenyList(denyList, allowList);
|
||||
|
||||
// 初始化 DFA 信息
|
||||
if(sensitiveWordMap == null) {
|
||||
@@ -74,6 +75,59 @@ public class SensitiveWordBs {
|
||||
sensitiveWordMap.initWordMap(results);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取禁止列表中真正的禁止词汇
|
||||
* @param denyList 禁止
|
||||
* @param allowList 允许
|
||||
* @return 结果
|
||||
* @since 0.1.1
|
||||
*/
|
||||
List<String> getActualDenyList(List<String> denyList,
|
||||
List<String> allowList) {
|
||||
if(CollectionUtil.isEmpty(denyList)) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
if(CollectionUtil.isEmpty(allowList)) {
|
||||
return denyList;
|
||||
}
|
||||
|
||||
List<String> formatDenyList = this.formatWordList(denyList);
|
||||
List<String> formatAllowList = this.formatWordList(allowList);
|
||||
|
||||
List<String> resultList = new ArrayList<>();
|
||||
// O(1)
|
||||
Set<String> allowSet = new HashSet<>(formatAllowList);
|
||||
|
||||
for(String deny : formatDenyList) {
|
||||
if(allowSet.contains(deny)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
resultList.add(deny);
|
||||
}
|
||||
return resultList;
|
||||
}
|
||||
|
||||
/**
|
||||
* 数据格式化处理
|
||||
* @param list 列表
|
||||
* @return 结果
|
||||
* @since 0.1.1
|
||||
*/
|
||||
private List<String> formatWordList(List<String> list) {
|
||||
if(CollectionUtil.isEmpty(list)) {
|
||||
return list;
|
||||
}
|
||||
|
||||
List<String> resultList = new ArrayList<>(list.size());
|
||||
for(String word : list) {
|
||||
String formatWord = InnerFormatUtils.format(word, this.context);
|
||||
resultList.add(formatWord);
|
||||
}
|
||||
|
||||
return resultList;
|
||||
}
|
||||
|
||||
/**
|
||||
* 新建验证实例
|
||||
* <p>
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
package com.github.houbb.sensitive.word.utils;
|
||||
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.sensitive.word.api.ICharFormat;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||
|
||||
/**
|
||||
* 内部格式化工具类
|
||||
* @since 0.1.1
|
||||
*/
|
||||
public final class InnerFormatUtils {
|
||||
|
||||
private InnerFormatUtils(){}
|
||||
|
||||
/**
|
||||
* 格式化
|
||||
* @param original 原始
|
||||
* @param context 上下文
|
||||
* @return 结果
|
||||
* @since 0.1.1
|
||||
*/
|
||||
public static String format(String original, IWordContext context) {
|
||||
if(StringUtil.isEmpty(original)) {
|
||||
return original;
|
||||
}
|
||||
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
ICharFormat charFormat = Instances.singleton(CharFormatChain.class);
|
||||
char[] chars = original.toCharArray();
|
||||
for(char c : chars) {
|
||||
char cf = charFormat.format(c, context);
|
||||
stringBuilder.append(cf);
|
||||
}
|
||||
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.github.houbb.sensitive.word.bugs.b20211211;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordAllow;
|
||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||
import com.github.houbb.sensitive.word.support.allow.WordAllows;
|
||||
import com.github.houbb.sensitive.word.support.deny.WordDenys;
|
||||
import org.junit.Test;
|
||||
|
||||
public class MySensitiveTest {
|
||||
|
||||
|
||||
@Test
|
||||
public void test() {
|
||||
IWordDeny wordDeny = WordDenys.chains(WordDenys.system(), new MyWordDeny());
|
||||
IWordAllow wordAllow = WordAllows.chains(WordAllows.system(), new MyWordAllow());
|
||||
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
|
||||
.wordAllow(wordAllow)
|
||||
.wordDeny(wordDeny)// 各种其他配置
|
||||
.init();// init() 初始化敏感词字典
|
||||
|
||||
final String text = "五星红旗 我的自定义敏感词尼玛";
|
||||
//输出测试结果
|
||||
System.out.println("敏感词:"+sensitiveWordBs.findAll(text).toString());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
package com.github.houbb.sensitive.word.bugs.b20211211;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordAllow;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class MyWordAllow implements IWordAllow {
|
||||
|
||||
@Override
|
||||
public List<String> allow() {
|
||||
return Arrays.asList("五星红旗");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.github.houbb.sensitive.word.bugs.b20211211;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.IWordDeny;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class MyWordDeny implements IWordDeny {
|
||||
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
return Arrays.asList("尼玛");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
package com.github.houbb.sensitive.word.bugs;
|
||||
Reference in New Issue
Block a user