mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
release branch 0.9.0
This commit is contained in:
@@ -200,3 +200,9 @@
|
|||||||
| 3 | A | 添加 IWordAllowDenyCombine | 2023-06-10 23:51:58 | 允许用户自定义 allow+deny 的组合策略 |
|
| 3 | A | 添加 IWordAllowDenyCombine | 2023-06-10 23:51:58 | 允许用户自定义 allow+deny 的组合策略 |
|
||||||
| 4 | A | 添加引导类进阶的配置使用说明 | 2023-06-10 23:51:58 | 放在后续,避免内部接口不够稳定 |
|
| 4 | A | 添加引导类进阶的配置使用说明 | 2023-06-10 23:51:58 | 放在后续,避免内部接口不够稳定 |
|
||||||
| 5 | U | 内部接口名称统一为 IWordXXX | 2023-06-10 23:51:58 | |
|
| 5 | U | 内部接口名称统一为 IWordXXX | 2023-06-10 23:51:58 | |
|
||||||
|
|
||||||
|
# release_0.9.0
|
||||||
|
|
||||||
|
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||||
|
|:---|:-----|-----------------|:--------------------|:------|
|
||||||
|
| 1 | O | 移除单个汉字+部分常用词的脏词 | 2023-11-17 23:51:58 | 降低误判率 |
|
||||||
|
|||||||
14
README.md
14
README.md
@@ -58,7 +58,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.github.houbb</groupId>
|
<groupId>com.github.houbb</groupId>
|
||||||
<artifactId>sensitive-word</artifactId>
|
<artifactId>sensitive-word</artifactId>
|
||||||
<version>0.8.0</version>
|
<version>0.9.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -661,6 +661,16 @@ ps: 不同环境会有差异,但是比例基本稳定。
|
|||||||
|
|
||||||
# 后期 road-map
|
# 后期 road-map
|
||||||
|
|
||||||
|
- [x] 移除单个汉字的敏感词,在中国,要把词组当做一次词,降低误判率。
|
||||||
|
|
||||||
|
- [ ] 支持单个的敏感词变化?
|
||||||
|
|
||||||
|
remove、add、edit?
|
||||||
|
|
||||||
|
- [ ] 敏感词标签支持 + 分级支持
|
||||||
|
|
||||||
|
比较耗时间。
|
||||||
|
|
||||||
- [x] wordData 的内存占用对比 + 优化
|
- [x] wordData 的内存占用对比 + 优化
|
||||||
|
|
||||||
- [x] 用户指定自定义的词组,同时允许指定词组的组合获取,更加灵活
|
- [x] 用户指定自定义的词组,同时允许指定词组的组合获取,更加灵活
|
||||||
@@ -671,7 +681,7 @@ FormatCombine/CheckCombine/AllowDenyCombine 组合策略,允许用户自定义
|
|||||||
|
|
||||||
- [ ] 添加 ThreadLocal 等性能优化
|
- [ ] 添加 ThreadLocal 等性能优化
|
||||||
|
|
||||||
- [ ] 敏感词标签支持
|
|
||||||
|
|
||||||
# 拓展阅读
|
# 拓展阅读
|
||||||
|
|
||||||
|
|||||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.github.houbb</groupId>
|
<groupId>com.github.houbb</groupId>
|
||||||
<artifactId>sensitive-word</artifactId>
|
<artifactId>sensitive-word</artifactId>
|
||||||
<version>0.8.0</version>
|
<version>0.9.0</version>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<!--============================== All Plugins START ==============================-->
|
<!--============================== All Plugins START ==============================-->
|
||||||
|
|||||||
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
|
|||||||
|
|
||||||
:: 版本号信息(需要手动指定)
|
:: 版本号信息(需要手动指定)
|
||||||
:::: 旧版本名称
|
:::: 旧版本名称
|
||||||
SET version=0.8.0
|
SET version=0.9.0
|
||||||
:::: 新版本名称
|
:::: 新版本名称
|
||||||
SET newVersion=0.9.0
|
SET newVersion=0.10.0
|
||||||
:::: 组织名称
|
:::: 组织名称
|
||||||
SET groupName=com.github.houbb
|
SET groupName=com.github.houbb
|
||||||
:::: 项目名称
|
:::: 项目名称
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -23,7 +23,7 @@ public class SensitiveWordBsEmailTest {
|
|||||||
final String text = "楼主好人,邮箱 sensitiveword@xx.com";
|
final String text = "楼主好人,邮箱 sensitiveword@xx.com";
|
||||||
|
|
||||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||||
Assert.assertEquals("[邮箱, sensitiveword@xx.com]", wordList.toString());
|
Assert.assertEquals("[sensitiveword@xx.com]", wordList.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -35,7 +35,7 @@ public class SensitiveWordBsEmailTest {
|
|||||||
final String text = "楼主好人,邮箱 123456789@xx.com";
|
final String text = "楼主好人,邮箱 123456789@xx.com";
|
||||||
|
|
||||||
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
|
||||||
Assert.assertEquals("[邮箱, 123456789, xx.com]", wordList.toString());
|
Assert.assertEquals("[123456789, xx.com]", wordList.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,42 @@
|
|||||||
|
package com.github.houbb.sensitive.word.data;
|
||||||
|
|
||||||
|
import com.github.houbb.heaven.util.io.FileUtil;
|
||||||
|
import org.junit.Ignore;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 数据初始化
|
||||||
|
* @author binbin.hou
|
||||||
|
* @since 0.9.0
|
||||||
|
*/
|
||||||
|
@Ignore
|
||||||
|
public class DictRemoveSingleTest {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 统一格式
|
||||||
|
*
|
||||||
|
* 1. 将所有的大写字母统一转换为小写
|
||||||
|
* 2. 将所有的全角转换为半角
|
||||||
|
* 3. 移除所有【空格】【符号】(这个就是各种符号的过滤了)
|
||||||
|
* 4. 繁体字统一转换为简体字
|
||||||
|
* @since 0.0.3
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
@Ignore
|
||||||
|
public void removeSingleWord() {
|
||||||
|
final String sourceFile = "D:\\code\\github\\sensitive-word\\src\\test\\resources\\dict_20231117.txt";
|
||||||
|
final String targetFile = "D:\\code\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||||
|
|
||||||
|
List<String> words = FileUtil.readAllLines(sourceFile);
|
||||||
|
|
||||||
|
for(String word : words) {
|
||||||
|
String wordTrim = word.trim();
|
||||||
|
if(wordTrim.length() > 1) {
|
||||||
|
FileUtil.append(targetFile, wordTrim);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
65338
src/test/resources/dict_20231117.txt
Normal file
65338
src/test/resources/dict_20231117.txt
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user