mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 16:37:17 +08:00
[Feature] add for new
This commit is contained in:
@@ -270,7 +270,7 @@ public class SensitiveWordMap implements IWordMap {
|
||||
return target;
|
||||
}
|
||||
// 用于结果构建
|
||||
StringBuilder resultBuilder = new StringBuilder();
|
||||
StringBuilder resultBuilder = new StringBuilder(target.length());
|
||||
|
||||
for (int i = 0; i < target.length(); i++) {
|
||||
char currentChar = target.charAt(i);
|
||||
|
||||
2047
src/main/resources/stopword.txt
Normal file
2047
src/main/resources/stopword.txt
Normal file
File diff suppressed because it is too large
Load Diff
1603
src/main/resources/stopword_zh.txt
Normal file
1603
src/main/resources/stopword_zh.txt
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,27 @@
|
||||
package com.github.houbb.sensitive.word.data;
|
||||
|
||||
import com.github.houbb.heaven.util.io.FileUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author binbin.hou
|
||||
* @since 0.0.3
|
||||
*/
|
||||
public class DataUtil {
|
||||
|
||||
/**
|
||||
* 获取对应文件的独一无二内容
|
||||
* @param name 名称
|
||||
* @return 结果
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public static List<String> distinctLines(final String name) {
|
||||
final String dir = "D:\\github\\sensitive-word\\src\\main\\resources\\";
|
||||
final String path = dir + name;
|
||||
List<String> lines = FileUtil.readAllLines(path);
|
||||
return CollectionUtil.distinct(lines);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
package com.github.houbb.sensitive.word.data;
|
||||
|
||||
import com.github.houbb.heaven.support.condition.ICondition;
|
||||
import com.github.houbb.heaven.support.filter.IFilter;
|
||||
import com.github.houbb.heaven.util.io.FileUtil;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.heaven.util.util.CharsetUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 停止词数据初始化
|
||||
* @author binbin.hou
|
||||
* @since 0.0.3
|
||||
*/
|
||||
public class StopWordTest {
|
||||
|
||||
/**
|
||||
* 中文测试
|
||||
* @since 0.0.3
|
||||
*/
|
||||
@Test
|
||||
@Ignore
|
||||
public void zhTest() {
|
||||
final String sourceFile = "stopword.txt";
|
||||
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\stopword_zh.txt";
|
||||
|
||||
List<String> allLines = DataUtil.distinctLines(sourceFile);
|
||||
|
||||
List<String> zhLines = CollectionUtil.conditionList(allLines, new ICondition<String>() {
|
||||
@Override
|
||||
public boolean condition(String s) {
|
||||
return CharsetUtil.isAllChinese(s);
|
||||
}
|
||||
});
|
||||
|
||||
FileUtil.write(targetFile, zhLines);
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user