mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
Merge remote-tracking branch 'origin/master'
# Conflicts: # CHANGE_LOG.md # README.md # pom.xml # release.bat
This commit is contained in:
@@ -293,6 +293,24 @@
|
||||
|:---|:-----|------------|:-------------------|:-------------------------------------------------|
|
||||
| 1 | A | 结果添加敏感词的类别 | 2024-4-11 15:02:25 | |
|
||||
|
||||
# release_0.15.0
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|---------------------|:-------------------|:-------------------------------------------------|
|
||||
| 1 | F | 调整默认文件名称,避免和其他框架重合。 | 2024-4-23 21:02:25 | https://github.com/houbb/sensitive-word/issues/54 |
|
||||
|
||||
# release_0.16.0
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|----------------------|:-------------------|:-------------------------------------------------------|
|
||||
| 1 | A | 支持资源的释放,如 andriod 场景 | 2024-4-26 21:02:25 | 废弃版本,https://github.com/houbb/sensitive-word/issues/53 |
|
||||
|
||||
# release_0.16.1
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|:---|:-----|----------------------|:-------------------|:----------------------------------------------------------|
|
||||
| 1 | F | 支持资源的释放,如 andriod 场景 | 2024-4-26 21:02:25 | 修正未调用问题,https://github.com/houbb/sensitive-word/issues/53 |
|
||||
|
||||
# release_0.14.1
|
||||
|
||||
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|
||||
|
||||
27
README.md
27
README.md
@@ -56,6 +56,9 @@ V0.14.1:
|
||||
|
||||
- 移除部分敏感词
|
||||
- 默认关闭 url/email/num 的校验
|
||||
V0.16.1:
|
||||
|
||||
- [x] 支持内存释放 [#53](https://github.com/houbb/sensitive-word/issues/53)
|
||||
|
||||
## 更多资料
|
||||
|
||||
@@ -87,7 +90,7 @@ V0.14.1:
|
||||
<dependency>
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.14.1</version>
|
||||
<version>0.16.1</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@@ -454,6 +457,19 @@ Assert.assertTrue(wordBs.contains(text));
|
||||
| 13 | charIgnore | 忽略的字符 | none |
|
||||
| 14 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 |
|
||||
|
||||
## 内存的释放
|
||||
|
||||
v0.16.1 开始支持,有时候我们需要释放内存,可以如下:
|
||||
|
||||
> [关于内存回收问题](https://github.com/houbb/sensitive-word/issues/53)
|
||||
|
||||
```java
|
||||
SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
|
||||
.init();
|
||||
// 后续因为一些原因移除了对应信息,希望释放内存。
|
||||
wordBs.destroy();
|
||||
```
|
||||
|
||||
# wordResultCondition-针对匹配词进一步判断
|
||||
|
||||
## 说明
|
||||
@@ -604,6 +620,15 @@ Assert.assertEquals("[政治, 国家]", sensitiveWordBs.tags("五星红旗").toS
|
||||
|
||||
后续会考虑引入一个内置的标签文件策略。
|
||||
|
||||
### 敏感词标签文件
|
||||
|
||||
梳理了大量的敏感词标签文件,可以让我们的敏感词更加方便。
|
||||
|
||||
这两个资料阅读可在下方文章获取:
|
||||
|
||||
> [v0.11.0-敏感词新特性及对应标签文件](https://mp.weixin.qq.com/s/m40ZnR6YF6WgPrArUSZ_0g)
|
||||
|
||||
|
||||
# 动态加载(用户自定义)
|
||||
|
||||
## 情景说明
|
||||
|
||||
2
pom.xml
2
pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.github.houbb</groupId>
|
||||
<artifactId>sensitive-word</artifactId>
|
||||
<version>0.14.1</version>
|
||||
<version>0.16.1</version>
|
||||
|
||||
<properties>
|
||||
<!--============================== All Plugins START ==============================-->
|
||||
|
||||
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
|
||||
|
||||
:: 版本号信息(需要手动指定)
|
||||
:::: 旧版本名称
|
||||
SET version=0.14.1
|
||||
SET version=0.16.1
|
||||
:::: 新版本名称
|
||||
SET newVersion=0.15.0
|
||||
SET newVersion=0.17.0
|
||||
:::: 组织名称
|
||||
SET groupName=com.github.houbb
|
||||
:::: 项目名称
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.github.houbb.sensitive.word.api;
|
||||
|
||||
public interface ISensitiveWordDestroy {
|
||||
|
||||
/**
|
||||
* 资源的销毁
|
||||
* @since 0.16.0
|
||||
*/
|
||||
void destroy();
|
||||
|
||||
}
|
||||
@@ -11,7 +11,7 @@ import java.util.Collection;
|
||||
* @author binbin.hou
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public interface IWordData {
|
||||
public interface IWordData extends ISensitiveWordDestroy {
|
||||
|
||||
|
||||
/**
|
||||
|
||||
@@ -32,7 +32,7 @@ import java.util.Set;
|
||||
* @author binbin.hou
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public class SensitiveWordBs {
|
||||
public class SensitiveWordBs implements ISensitiveWordDestroy {
|
||||
|
||||
/**
|
||||
* 私有化构造器
|
||||
@@ -573,6 +573,11 @@ public class SensitiveWordBs {
|
||||
return wordTag.getTag(word);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
this.wordData.destroy();
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------------ 公开方法 END
|
||||
|
||||
}
|
||||
|
||||
@@ -183,4 +183,11 @@ public class WordDataHashMap extends AbstractWordData {
|
||||
return currentMap;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void destroy() {
|
||||
if(innerWordMap != null) {
|
||||
innerWordMap.clear();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -117,4 +117,10 @@ public class WordDataTree implements IWordData {
|
||||
return currentMap;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
if(this.root != null) {
|
||||
this.root.destroy();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
package com.github.houbb.sensitive.word.support.data;
|
||||
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveWordDestroy;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
@@ -8,7 +10,7 @@ import java.util.Map;
|
||||
*
|
||||
* @since 0.7.0
|
||||
*/
|
||||
public class WordDataTreeNode {
|
||||
public class WordDataTreeNode implements ISensitiveWordDestroy {
|
||||
|
||||
/**
|
||||
* 关键词结束标识
|
||||
@@ -46,4 +48,11 @@ public class WordDataTreeNode {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
if(subNodeMap != null) {
|
||||
subNodeMap.clear();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -25,8 +25,8 @@ public class WordDenySystem implements IWordDeny {
|
||||
|
||||
@Override
|
||||
public List<String> deny() {
|
||||
List<String> results = StreamUtil.readAllLines("/dict.txt");
|
||||
results.addAll(StreamUtil.readAllLines("/dict_en.txt"));
|
||||
List<String> results = StreamUtil.readAllLines("/sensitive_word_dict.txt");
|
||||
results.addAll(StreamUtil.readAllLines("/sensitive_word_dict_en.txt"));
|
||||
results.addAll(StreamUtil.readAllLines("/sensitive_word_deny.txt"));
|
||||
return results;
|
||||
}
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
package ai.com.github.houbb.sensitive.word.utils;
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.github.houbb.sensitive.word.bs;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* 资源的销毁
|
||||
*
|
||||
* @since 0.16.0
|
||||
*/
|
||||
public class SensitiveWordBsDestroyTest {
|
||||
|
||||
@Test
|
||||
public void destroyTest() {
|
||||
SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
|
||||
.init();
|
||||
// 后续因为一些原因移除了对应信息,希望释放内存。
|
||||
wordBs.destroy();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
package com.github.houbb.sensitive.word.bugs.b55;
|
||||
|
||||
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class Bug55Test {
|
||||
|
||||
@Test
|
||||
public void test() {
|
||||
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
|
||||
.init();
|
||||
final String text = "以个人账户或现金收取资金、现场或即时交付本金即给予部分提成、分红、利息;";
|
||||
Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -34,11 +34,11 @@ public class DataInitTest {
|
||||
@Test
|
||||
@Ignore
|
||||
public void trimTest() {
|
||||
final String source = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String source = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
|
||||
List<String> lines = FileUtil.readAllLines(source);
|
||||
List<String> trimLines = CollectionUtil.distinct(CollectionUtil.trimCollection(lines));
|
||||
|
||||
final String target = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String target = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -75,7 +75,7 @@ public class DataInitTest {
|
||||
@Test
|
||||
@Ignore
|
||||
public void dictAllInOneTest() {
|
||||
final List<String> allLines = distinctLines("dict.txt");
|
||||
final List<String> allLines = distinctLines("sensitive_word_dict.txt");
|
||||
|
||||
allLines.addAll(distinctLines("不正当竞争.txt"));
|
||||
allLines.addAll(distinctLines("人名.txt"));
|
||||
@@ -97,7 +97,7 @@ public class DataInitTest {
|
||||
|
||||
Collections.sort(disctinct);
|
||||
|
||||
final String target = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String target = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
|
||||
|
||||
FileUtil.write(target, disctinct);
|
||||
}
|
||||
@@ -105,7 +105,7 @@ public class DataInitTest {
|
||||
@Test
|
||||
@Ignore
|
||||
public void oneWordTest() {
|
||||
final String source = "D:\\_github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String source = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
|
||||
|
||||
List<String> lines = FileUtil.readAllLines(source);
|
||||
for(int i = 0; i < lines.size(); i++) {
|
||||
|
||||
@@ -38,7 +38,7 @@ public class DataUtil {
|
||||
@Test
|
||||
@Ignore
|
||||
public void singleCharTest() {
|
||||
final String path = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String path = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
|
||||
|
||||
List<String> stringList = FileUtil.readAllLines(path);
|
||||
for(String s : stringList) {
|
||||
|
||||
@@ -26,8 +26,8 @@ public class DictNumTest {
|
||||
@Test
|
||||
@Ignore
|
||||
public void formatTest() {
|
||||
final String sourceFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String targetFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String sourceFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
|
||||
final String targetFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
|
||||
|
||||
List<String> words = FileUtil.readAllLines(sourceFile);
|
||||
// List<String> formats = CollectionUtil.toList(words, new IHandler<String, String>() {
|
||||
|
||||
@@ -27,7 +27,7 @@ public class DictRemoveSingleTest {
|
||||
@Ignore
|
||||
public void removeSingleWord() {
|
||||
final String sourceFile = "D:\\code\\github\\sensitive-word\\src\\test\\resources\\dict_20231117.txt";
|
||||
final String targetFile = "D:\\code\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String targetFile = "D:\\code\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
|
||||
|
||||
List<String> words = FileUtil.readAllLines(sourceFile);
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ import java.util.List;
|
||||
public class DictRemoveTwoEnglishTest {
|
||||
|
||||
public static void main(String[] args) {
|
||||
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
|
||||
final String targetFile = "D:\\github\\sensitive-word\\src\\test\\resources\\dict_v20240407.txt";
|
||||
|
||||
List<String> words = FileUtil.readAllLines(sourceFile);
|
||||
|
||||
@@ -33,8 +33,8 @@ public class DictSlimTest {
|
||||
@Test
|
||||
@Ignore
|
||||
public void formatTest() {
|
||||
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
|
||||
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
|
||||
|
||||
List<String> words = FileUtil.readAllLines(sourceFile);
|
||||
|
||||
@@ -67,8 +67,8 @@ public class DictSlimTest {
|
||||
@Test
|
||||
@Ignore
|
||||
public void removeTest() {
|
||||
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
|
||||
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
|
||||
|
||||
List<String> words = FileUtil.readAllLines(sourceFile);
|
||||
|
||||
@@ -92,8 +92,8 @@ public class DictSlimTest {
|
||||
*/
|
||||
@Test
|
||||
public void removeNumberMappingTest() {
|
||||
final String sourceFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String targetFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String sourceFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
|
||||
final String targetFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
|
||||
|
||||
List<String> words = FileUtil.readAllLines(sourceFile);
|
||||
List<String> formats = CollectionUtil.toList(words, new IHandler<String, String>() {
|
||||
|
||||
@@ -22,7 +22,7 @@ public class DataMemoryTest {
|
||||
*/
|
||||
@Test
|
||||
public void hashMapTest() {
|
||||
List<String> allLines = StreamUtil.readAllLines("/dict.txt");
|
||||
List<String> allLines = StreamUtil.readAllLines("/sensitive_word_dict.txt");
|
||||
IWordData wordData = WordDatas.defaults();
|
||||
|
||||
wordData.initWordData(allLines);
|
||||
@@ -36,7 +36,7 @@ public class DataMemoryTest {
|
||||
//33.4 MB
|
||||
@Test
|
||||
public void treeTest() {
|
||||
List<String> allLines = StreamUtil.readAllLines("/dict.txt");
|
||||
List<String> allLines = StreamUtil.readAllLines("/sensitive_word_dict.txt");
|
||||
IWordData wordData = WordDatas.tree();
|
||||
|
||||
wordData.initWordData(allLines);
|
||||
|
||||
Reference in New Issue
Block a user