release branch 0.1.1

This commit is contained in:
houbb
2021-12-11 23:29:59 +08:00
parent f400f56118
commit 7ca843ee4f
10 changed files with 169 additions and 12 deletions

View File

@@ -120,3 +120,9 @@
| 1 | A | 返回敏感词对应的下标范围 | 2021-8-8 20:51:58 | |
| 2 | U | ignoreRepeat 默认为 false | 2021-8-8 20:51:58 | |
| 3 | U | 把测试、系统、买卖、彩票等常用词移出敏感词库 | 2021-8-8 20:51:58 | |
# release_0.1.1
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:---|:---|:---|:--|
| 1 | F | 自定义敏感词 allow/deny 进行格式化处理 | 2021-12-11 23:51:58 | |

View File

@@ -44,11 +44,9 @@
[CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/doc/CHANGE_LOG.md)
v0.1.0 变更:
v0.1.1 变更:
- 返回敏感词对应的开始结束下标信息
- 优化词库
- 敏感词自定义 Allow/Deny 进行格式化处理
# 快速开始
@@ -64,7 +62,7 @@ v0.1.0 变更:
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.1.0</version>
<version>0.1.1</version>
</dependency>
```

View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.1.0</version>
<version>0.1.1</version>
<properties>
<!--============================== All Plugins START ==============================-->
@@ -25,8 +25,8 @@
<project.compiler.level>1.7</project.compiler.level>
<!--============================== INTER ==============================-->
<heaven.version>0.1.129</heaven.version>
<opencc4j.version>1.2.0</opencc4j.version>
<heaven.version>0.1.148</heaven.version>
<opencc4j.version>1.7.1</opencc4j.version>
<!--============================== OTHER ==============================-->
<junit.version>4.13.1</junit.version>

View File

@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
SET version=0.1.0
SET version=0.1.1
:::: 新版本名称
SET newVersion=0.2.0
SET newVersion=0.1.2
:::: 组织名称
SET groupName=com.github.houbb
:::: 项目名称

View File

@@ -9,8 +9,9 @@ import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
import com.github.houbb.sensitive.word.support.map.SensitiveWordMap;
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
import com.github.houbb.sensitive.word.utils.InnerFormatUtils;
import java.util.List;
import java.util.*;
/**
* 敏感词引导类
@@ -64,7 +65,7 @@ public class SensitiveWordBs {
// 加载配置信息
List<String> denyList = wordDeny.deny();
List<String> allowList = wordAllow.allow();
List<String> results = CollectionUtil.difference(denyList, allowList);
List<String> results = getActualDenyList(denyList, allowList);
// 初始化 DFA 信息
if(sensitiveWordMap == null) {
@@ -74,6 +75,59 @@ public class SensitiveWordBs {
sensitiveWordMap.initWordMap(results);
}
/**
* 获取禁止列表中真正的禁止词汇
* @param denyList 禁止
* @param allowList 允许
* @return 结果
* @since 0.1.1
*/
List<String> getActualDenyList(List<String> denyList,
List<String> allowList) {
if(CollectionUtil.isEmpty(denyList)) {
return Collections.emptyList();
}
if(CollectionUtil.isEmpty(allowList)) {
return denyList;
}
List<String> formatDenyList = this.formatWordList(denyList);
List<String> formatAllowList = this.formatWordList(allowList);
List<String> resultList = new ArrayList<>();
// O(1)
Set<String> allowSet = new HashSet<>(formatAllowList);
for(String deny : formatDenyList) {
if(allowSet.contains(deny)) {
continue;
}
resultList.add(deny);
}
return resultList;
}
/**
* 数据格式化处理
* @param list 列表
* @return 结果
* @since 0.1.1
*/
private List<String> formatWordList(List<String> list) {
if(CollectionUtil.isEmpty(list)) {
return list;
}
List<String> resultList = new ArrayList<>(list.size());
for(String word : list) {
String formatWord = InnerFormatUtils.format(word, this.context);
resultList.add(formatWord);
}
return resultList;
}
/**
* 新建验证实例
* <p>

View File

@@ -0,0 +1,40 @@
package com.github.houbb.sensitive.word.utils;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.sensitive.word.api.ICharFormat;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
/**
* 内部格式化工具类
* @since 0.1.1
*/
public final class InnerFormatUtils {
private InnerFormatUtils(){}
/**
* 格式化
* @param original 原始
* @param context 上下文
* @return 结果
* @since 0.1.1
*/
public static String format(String original, IWordContext context) {
if(StringUtil.isEmpty(original)) {
return original;
}
StringBuilder stringBuilder = new StringBuilder();
ICharFormat charFormat = Instances.singleton(CharFormatChain.class);
char[] chars = original.toCharArray();
for(char c : chars) {
char cf = charFormat.format(c, context);
stringBuilder.append(cf);
}
return stringBuilder.toString();
}
}

View File

@@ -0,0 +1,27 @@
package com.github.houbb.sensitive.word.bugs.b20211211;
import com.github.houbb.sensitive.word.api.IWordAllow;
import com.github.houbb.sensitive.word.api.IWordDeny;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
import org.junit.Test;
public class MySensitiveTest {
@Test
public void test() {
IWordDeny wordDeny = WordDenys.chains(WordDenys.system(), new MyWordDeny());
IWordAllow wordAllow = WordAllows.chains(WordAllows.system(), new MyWordAllow());
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
.wordAllow(wordAllow)
.wordDeny(wordDeny)// 各种其他配置
.init();// init() 初始化敏感词字典
final String text = "五星红旗 我的自定义敏感词尼玛";
//输出测试结果
System.out.println("敏感词:"+sensitiveWordBs.findAll(text).toString());
}
}

View File

@@ -0,0 +1,15 @@
package com.github.houbb.sensitive.word.bugs.b20211211;
import com.github.houbb.sensitive.word.api.IWordAllow;
import java.util.Arrays;
import java.util.List;
public class MyWordAllow implements IWordAllow {
@Override
public List<String> allow() {
return Arrays.asList("五星红旗");
}
}

View File

@@ -0,0 +1,16 @@
package com.github.houbb.sensitive.word.bugs.b20211211;
import com.github.houbb.sensitive.word.api.IWordDeny;
import java.util.Arrays;
import java.util.List;
public class MyWordDeny implements IWordDeny {
@Override
public List<String> deny() {
return Arrays.asList("尼玛");
}
}

View File

@@ -0,0 +1 @@
package com.github.houbb.sensitive.word.bugs;