mirror of
https://github.com/houbb/sensitive-word.git
synced 2026-03-22 08:27:36 +08:00
release branch 0.0.5
This commit is contained in:
@@ -45,4 +45,5 @@
|
||||
| 2 | D | 移除单个字符 `我` | 2020-1-10 09:34:35 | |
|
||||
| 3 | O | 责任链模式优化代码实现 | 2020-1-10 09:34:35 | |
|
||||
| 4 | A | 支持数字格式化转换 | 2020-1-10 09:34:35 | |
|
||||
| 5 | A | 支持数字敏感词验证 | 2020-1-10 09:34:35 | |
|
||||
| 5 | A | 支持数字敏感词验证 | 2020-1-10 09:34:35 | |
|
||||
| 6 | O | 优化所有写法的数字为阿拉伯写法 | 2020-1-10 09:34:35 | |
|
||||
@@ -23,7 +23,7 @@ public final class AppConst {
|
||||
* 字典的大小
|
||||
* @since 0.0.1
|
||||
*/
|
||||
public static final int DICT_SIZE = 65709;
|
||||
public static final int DICT_SIZE = 65295;
|
||||
|
||||
/**
|
||||
* 英语词典的大小
|
||||
|
||||
@@ -2,12 +2,11 @@ package com.github.houbb.sensitive.word.support.check;
|
||||
|
||||
import com.github.houbb.heaven.annotation.ThreadSafe;
|
||||
import com.github.houbb.heaven.support.instance.impl.Instances;
|
||||
import com.github.houbb.heaven.util.lang.CharUtil;
|
||||
import com.github.houbb.sensitive.word.api.ISensitiveCheck;
|
||||
import com.github.houbb.sensitive.word.api.IWordContext;
|
||||
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
|
||||
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
|
||||
import com.github.houbb.sensitive.word.utils.NumUtils;
|
||||
import com.github.houbb.sensitive.word.support.format.IgnoreNumStyleCharFormat;
|
||||
|
||||
/**
|
||||
* 敏感词监测实现
|
||||
|
||||
@@ -23,7 +23,12 @@ public class CharFormatChain implements ICharFormat {
|
||||
List<ICharFormat> charFormats = Guavas.newArrayList();
|
||||
if(context.ignoreCase()) {
|
||||
charFormats.add(Instances.singleton(IgnoreCaseCharFormat.class));
|
||||
|
||||
}
|
||||
if(context.ignoreWidth()) {
|
||||
charFormats.add(Instances.singleton(IgnoreWidthCharFormat.class));
|
||||
}
|
||||
if(context.ignoreNumStyle()) {
|
||||
charFormats.add(Instances.singleton(IgnoreNumStyleCharFormat.class));
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,51 @@
|
||||
package com.github.houbb.sensitive.word.data;
|
||||
|
||||
import com.github.houbb.heaven.support.handler.IHandler;
|
||||
import com.github.houbb.heaven.util.io.FileUtil;
|
||||
import com.github.houbb.heaven.util.lang.StringUtil;
|
||||
import com.github.houbb.heaven.util.util.CollectionUtil;
|
||||
import com.github.houbb.opencc4j.core.impl.ZhConvertBootstrap;
|
||||
import com.github.houbb.opencc4j.support.segment.impl.CharSegment;
|
||||
import com.github.houbb.sensitive.word.utils.NumUtils;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 数据数据的格式统一化
|
||||
* @author binbin.hou
|
||||
* @since 0.0.5
|
||||
*/
|
||||
@Ignore
|
||||
public class DictNumTest {
|
||||
|
||||
/**
|
||||
* 统一格式
|
||||
*
|
||||
* 1. 将所有的大写字母统一转换为小写
|
||||
* 2. 将所有的全角转换为半角
|
||||
* 3. 移除所有【空格】【符号】(这个就是各种符号的过滤了)
|
||||
* 4. 繁体字统一转换为简体字
|
||||
* @since 0.0.3
|
||||
*/
|
||||
@Test
|
||||
@Ignore
|
||||
public void formatTest() {
|
||||
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
|
||||
List<String> words = FileUtil.readAllLines(sourceFile);
|
||||
List<String> formats = CollectionUtil.toList(words, new IHandler<String, String>() {
|
||||
@Override
|
||||
public String handle(String string) {
|
||||
// 数字的格式化统一处理
|
||||
return NumUtils.getMappingString(string);
|
||||
}
|
||||
});
|
||||
|
||||
List<String> resultList = DataUtil.disctinctAndSort(formats);
|
||||
FileUtil.write(targetFile, resultList);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -35,7 +35,7 @@ public class DictSlimTest {
|
||||
@Ignore
|
||||
public void formatTest() {
|
||||
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict_format.txt";
|
||||
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
|
||||
List<String> words = FileUtil.readAllLines(sourceFile);
|
||||
|
||||
@@ -68,7 +68,7 @@ public class DictSlimTest {
|
||||
@Test
|
||||
@Ignore
|
||||
public void removeTest() {
|
||||
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict_format.txt";
|
||||
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
|
||||
|
||||
List<String> words = FileUtil.readAllLines(sourceFile);
|
||||
|
||||
Reference in New Issue
Block a user