Merge remote-tracking branch 'origin/master'

# Conflicts:
#	CHANGE_LOG.md
#	README.md
#	pom.xml
#	release.bat
This commit is contained in:
houbb
2024-06-01 16:07:32 +08:00
23 changed files with 146 additions and 27 deletions

View File

@@ -293,6 +293,24 @@
|:---|:-----|------------|:-------------------|:-------------------------------------------------|
| 1 | A | 结果添加敏感词的类别 | 2024-4-11 15:02:25 | |
# release_0.15.0
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|---------------------|:-------------------|:-------------------------------------------------|
| 1 | F | 调整默认文件名称,避免和其他框架重合。 | 2024-4-23 21:02:25 | https://github.com/houbb/sensitive-word/issues/54 |
# release_0.16.0
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|----------------------|:-------------------|:-------------------------------------------------------|
| 1 | A | 支持资源的释放,如 andriod 场景 | 2024-4-26 21:02:25 | 废弃版本https://github.com/houbb/sensitive-word/issues/53 |
# release_0.16.1
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|----------------------|:-------------------|:----------------------------------------------------------|
| 1 | F | 支持资源的释放,如 andriod 场景 | 2024-4-26 21:02:25 | 修正未调用问题https://github.com/houbb/sensitive-word/issues/53 |
# release_0.14.1
| 序号 | 变更类型 | 说明 | 时间 | 备注 |

View File

@@ -56,6 +56,9 @@ V0.14.1:
- 移除部分敏感词
- 默认关闭 url/email/num 的校验
V0.16.1:
- [x] 支持内存释放 [#53](https://github.com/houbb/sensitive-word/issues/53)
## 更多资料
@@ -87,7 +90,7 @@ V0.14.1:
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.14.1</version>
<version>0.16.1</version>
</dependency>
```
@@ -454,6 +457,19 @@ Assert.assertTrue(wordBs.contains(text));
| 13 | charIgnore | 忽略的字符 | none |
| 14 | wordResultCondition | 针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配 | 恒为真 |
## 内存的释放
v0.16.1 开始支持,有时候我们需要释放内存,可以如下:
> [关于内存回收问题](https://github.com/houbb/sensitive-word/issues/53)
```java
SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
.init();
// 后续因为一些原因移除了对应信息,希望释放内存。
wordBs.destroy();
```
# wordResultCondition-针对匹配词进一步判断
## 说明
@@ -604,6 +620,15 @@ Assert.assertEquals("[政治, 国家]", sensitiveWordBs.tags("五星红旗").toS
后续会考虑引入一个内置的标签文件策略。
### 敏感词标签文件
梳理了大量的敏感词标签文件,可以让我们的敏感词更加方便。
这两个资料阅读可在下方文章获取:
> [v0.11.0-敏感词新特性及对应标签文件](https://mp.weixin.qq.com/s/m40ZnR6YF6WgPrArUSZ_0g)
# 动态加载(用户自定义)
## 情景说明

View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.14.1</version>
<version>0.16.1</version>
<properties>
<!--============================== All Plugins START ==============================-->

View File

@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."
:: 版本号信息(需要手动指定)
:::: 旧版本名称
SET version=0.14.1
SET version=0.16.1
:::: 新版本名称
SET newVersion=0.15.0
SET newVersion=0.17.0
:::: 组织名称
SET groupName=com.github.houbb
:::: 项目名称

View File

@@ -0,0 +1,11 @@
package com.github.houbb.sensitive.word.api;
public interface ISensitiveWordDestroy {
/**
* 资源的销毁
* @since 0.16.0
*/
void destroy();
}

View File

@@ -11,7 +11,7 @@ import java.util.Collection;
* @author binbin.hou
* @since 0.0.1
*/
public interface IWordData {
public interface IWordData extends ISensitiveWordDestroy {
/**

View File

@@ -32,7 +32,7 @@ import java.util.Set;
* @author binbin.hou
* @since 0.0.1
*/
public class SensitiveWordBs {
public class SensitiveWordBs implements ISensitiveWordDestroy {
/**
* 私有化构造器
@@ -573,6 +573,11 @@ public class SensitiveWordBs {
return wordTag.getTag(word);
}
@Override
public void destroy() {
this.wordData.destroy();
}
//------------------------------------------------------------------------------------ 公开方法 END
}

View File

@@ -183,4 +183,11 @@ public class WordDataHashMap extends AbstractWordData {
return currentMap;
}
@Override
public synchronized void destroy() {
if(innerWordMap != null) {
innerWordMap.clear();
}
}
}

View File

@@ -117,4 +117,10 @@ public class WordDataTree implements IWordData {
return currentMap;
}
@Override
public void destroy() {
if(this.root != null) {
this.root.destroy();
}
}
}

View File

@@ -1,5 +1,7 @@
package com.github.houbb.sensitive.word.support.data;
import com.github.houbb.sensitive.word.api.ISensitiveWordDestroy;
import java.util.HashMap;
import java.util.Map;
@@ -8,7 +10,7 @@ import java.util.Map;
*
* @since 0.7.0
*/
public class WordDataTreeNode {
public class WordDataTreeNode implements ISensitiveWordDestroy {
/**
* 关键词结束标识
@@ -46,4 +48,11 @@ public class WordDataTreeNode {
return this;
}
@Override
public void destroy() {
if(subNodeMap != null) {
subNodeMap.clear();
}
}
}

View File

@@ -25,8 +25,8 @@ public class WordDenySystem implements IWordDeny {
@Override
public List<String> deny() {
List<String> results = StreamUtil.readAllLines("/dict.txt");
results.addAll(StreamUtil.readAllLines("/dict_en.txt"));
List<String> results = StreamUtil.readAllLines("/sensitive_word_dict.txt");
results.addAll(StreamUtil.readAllLines("/sensitive_word_dict_en.txt"));
results.addAll(StreamUtil.readAllLines("/sensitive_word_deny.txt"));
return results;
}

View File

@@ -0,0 +1 @@
package ai.com.github.houbb.sensitive.word.utils;

View File

@@ -0,0 +1,20 @@
package com.github.houbb.sensitive.word.bs;
import org.junit.Test;
/**
* 资源的销毁
*
* @since 0.16.0
*/
public class SensitiveWordBsDestroyTest {
@Test
public void destroyTest() {
SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
.init();
// 后续因为一些原因移除了对应信息,希望释放内存。
wordBs.destroy();
}
}

View File

@@ -0,0 +1,17 @@
package com.github.houbb.sensitive.word.bugs.b55;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
import org.junit.Assert;
import org.junit.Test;
public class Bug55Test {
@Test
public void test() {
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
.init();
final String text = "以个人账户或现金收取资金、现场或即时交付本金即给予部分提成、分红、利息;";
Assert.assertEquals("[]", sensitiveWordBs.findAll(text).toString());
}
}

View File

@@ -34,11 +34,11 @@ public class DataInitTest {
@Test
@Ignore
public void trimTest() {
final String source = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String source = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
List<String> lines = FileUtil.readAllLines(source);
List<String> trimLines = CollectionUtil.distinct(CollectionUtil.trimCollection(lines));
final String target = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String target = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
}
/**
@@ -75,7 +75,7 @@ public class DataInitTest {
@Test
@Ignore
public void dictAllInOneTest() {
final List<String> allLines = distinctLines("dict.txt");
final List<String> allLines = distinctLines("sensitive_word_dict.txt");
allLines.addAll(distinctLines("不正当竞争.txt"));
allLines.addAll(distinctLines("人名.txt"));
@@ -97,7 +97,7 @@ public class DataInitTest {
Collections.sort(disctinct);
final String target = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String target = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
FileUtil.write(target, disctinct);
}
@@ -105,7 +105,7 @@ public class DataInitTest {
@Test
@Ignore
public void oneWordTest() {
final String source = "D:\\_github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String source = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
List<String> lines = FileUtil.readAllLines(source);
for(int i = 0; i < lines.size(); i++) {

View File

@@ -38,7 +38,7 @@ public class DataUtil {
@Test
@Ignore
public void singleCharTest() {
final String path = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String path = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
List<String> stringList = FileUtil.readAllLines(path);
for(String s : stringList) {

View File

@@ -26,8 +26,8 @@ public class DictNumTest {
@Test
@Ignore
public void formatTest() {
final String sourceFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String targetFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String sourceFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
final String targetFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
List<String> words = FileUtil.readAllLines(sourceFile);
// List<String> formats = CollectionUtil.toList(words, new IHandler<String, String>() {

View File

@@ -27,7 +27,7 @@ public class DictRemoveSingleTest {
@Ignore
public void removeSingleWord() {
final String sourceFile = "D:\\code\\github\\sensitive-word\\src\\test\\resources\\dict_20231117.txt";
final String targetFile = "D:\\code\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String targetFile = "D:\\code\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
List<String> words = FileUtil.readAllLines(sourceFile);

View File

@@ -17,7 +17,7 @@ import java.util.List;
public class DictRemoveTwoEnglishTest {
public static void main(String[] args) {
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
final String targetFile = "D:\\github\\sensitive-word\\src\\test\\resources\\dict_v20240407.txt";
List<String> words = FileUtil.readAllLines(sourceFile);

View File

@@ -33,8 +33,8 @@ public class DictSlimTest {
@Test
@Ignore
public void formatTest() {
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
List<String> words = FileUtil.readAllLines(sourceFile);
@@ -67,8 +67,8 @@ public class DictSlimTest {
@Test
@Ignore
public void removeTest() {
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
List<String> words = FileUtil.readAllLines(sourceFile);
@@ -92,8 +92,8 @@ public class DictSlimTest {
*/
@Test
public void removeNumberMappingTest() {
final String sourceFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String targetFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String sourceFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
final String targetFile = "D:\\_github\\sensitive-word\\src\\main\\resources\\sensitive_word_dict.txt";
List<String> words = FileUtil.readAllLines(sourceFile);
List<String> formats = CollectionUtil.toList(words, new IHandler<String, String>() {

View File

@@ -22,7 +22,7 @@ public class DataMemoryTest {
*/
@Test
public void hashMapTest() {
List<String> allLines = StreamUtil.readAllLines("/dict.txt");
List<String> allLines = StreamUtil.readAllLines("/sensitive_word_dict.txt");
IWordData wordData = WordDatas.defaults();
wordData.initWordData(allLines);
@@ -36,7 +36,7 @@ public class DataMemoryTest {
//33.4 MB
@Test
public void treeTest() {
List<String> allLines = StreamUtil.readAllLines("/dict.txt");
List<String> allLines = StreamUtil.readAllLines("/sensitive_word_dict.txt");
IWordData wordData = WordDatas.tree();
wordData.initWordData(allLines);