mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
release branch 0.9.0
This commit is contained in:
@@ -200,3 +200,9 @@
|
||||
| 3 | A | 添加 IWordAllowDenyCombine | 2023-06-10 23:51:58 | 允许用户自定义 allow+deny 的组合策略 |
|
||||
| 4 | A | 添加引导类进阶的配置使用说明 | 2023-06-10 23:51:58 | 放在后续,避免内部接口不够稳定 |
|
||||
| 5 | U | 内部接口名称统一为 IWordXXX | 2023-06-10 23:51:58 | |
|
||||
|
||||
# release_0.9.0
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|-----------------|:--------------------|:------|
|
||||
| 1 | O | 移除单个汉字+部分常用词的脏词 | 2023-11-17 23:51:58 | 降低误判率 |
|
||||
|
||||
14
README.md
14
README.md
@@ -58,7 +58,7 @@
|
||||
<dependency>
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.8.0</version>
|
||||
<version>0.9.0</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@@ -661,6 +661,16 @@ ps: 不同环境会有差异,但是比例基本稳定。
|
||||
|
||||
# 后期 road-map
|
||||
|
||||
- [x] 移除单个汉字的敏感词,在中国,要把词组当做一次词,降低误判率。
|
||||
|
||||
- [ ] 支持单个的敏感词变化?
|
||||
|
||||
remove、add、edit?
|
||||
|
||||
- [ ] 敏感词标签支持 + 分级支持
|
||||
|
||||
比较耗时间。
|
||||
|
||||
- [x] wordData 的内存占用对比 + 优化
|
||||
|
||||
- [x] 用户指定自定义的词组,同时允许指定词组的组合获取,更加灵活
|
||||
@@ -671,7 +681,7 @@ FormatCombine/CheckCombine/AllowDenyCombine 组合策略,允许用户自定义
|
||||
|
||||
- [ ] 添加 ThreadLocal 等性能优化
|
||||
|
||||
- [ ] 敏感词标签支持
|
||||
|
||||
|
||||
# 拓展阅读
|
||||
|
||||
|
||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.8.0</version>
|
||||
<version>0.9.0</version>
|
||||
|
||||
<properties>
|
||||
<!--============================== All Plugins START ==============================-->
|
||||
|
||||
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
|
||||
|
||||
:: 版本号信息(需要手动指定)
|
||||
:::: 旧版本名称
|
||||
SET version=0.8.0
|
||||
SET version=0.9.0
|
||||
:::: 新版本名称
|
||||
SET newVersion=0.9.0
|
||||
SET newVersion=0.10.0
|
||||
:::: 组织名称
|
||||
SET groupName=com.github.houbb
|
||||
:::: 项目名称
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -23,7 +23,7 @@ public class SensitiveWordBsEmailTest {
|
||||
final String text = "楼主好人,邮箱 sensitiveword@xx.com";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||
Assert.assertEquals("[邮箱, sensitiveword@xx.com]", wordList.toString());
|
||||
Assert.assertEquals("[sensitiveword@xx.com]", wordList.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -35,7 +35,7 @@ public class SensitiveWordBsEmailTest {
|
||||
final String text = "楼主好人,邮箱 123456789@xx.com";
|
||||
|
||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||
Assert.assertEquals("[邮箱, 123456789, xx.com]", wordList.toString());
|
||||
Assert.assertEquals("[123456789, xx.com]", wordList.toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
package com.github.houbb.sensitive.word.data;
|
||||
|
||||
import com.github.houbb.heaven.util.io.FileUtil;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 数据初始化
|
||||
* @author binbin.hou
|
||||
* @since 0.9.0
|
||||
*/
|
||||
@Ignore
|
||||
public class DictRemoveSingleTest {
|
||||
|
||||
/**
|
||||
* 统一格式
|
||||
*
|
||||
* 1. 将所有的大写字母统一转换为小写
|
||||
* 2. 将所有的全角转换为半角
|
||||
* 3. 移除所有【空格】【符号】(这个就是各种符号的过滤了)
|
||||
* 4. 繁体字统一转换为简体字
|
||||
* @since 0.0.3
|
||||
*/
|
||||
@Test
|
||||
@Ignore
|
||||
public void removeSingleWord() {
|
||||
final String sourceFile = "D:\\code\\github\\sensitive-word\\src\\test\\resources\\dict_20231117.txt";
|
||||
final String targetFile = "D:\\code\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
|
||||
List<String> words = FileUtil.readAllLines(sourceFile);
|
||||
|
||||
for(String word : words) {
|
||||
String wordTrim = word.trim();
|
||||
if(wordTrim.length() > 1) {
|
||||
FileUtil.append(targetFile, wordTrim);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
65338
src/test/resources/dict_20231117.txt
Normal file
65338
src/test/resources/dict_20231117.txt
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user