release branch 0.0.5

This commit is contained in:
binbin.hou
2020-01-10 13:19:55 +08:00
parent 236fd661fe
commit 2128d6c757
7 changed files with 3833 additions and 4191 deletions

View File

@@ -45,4 +45,5 @@
| 2 | D | 移除单个字符 `我` | 2020-1-10 09:34:35 | |
| 3 | O | 责任链模式优化代码实现 | 2020-1-10 09:34:35 | |
| 4 | A | 支持数字格式化转换 | 2020-1-10 09:34:35 | |
| 5 | A | 支持数字敏感词验证 | 2020-1-10 09:34:35 | |
| 5 | A | 支持数字敏感词验证 | 2020-1-10 09:34:35 | |
| 6 | O | 优化所有写法的数字为阿拉伯写法 | 2020-1-10 09:34:35 | |

View File

@@ -23,7 +23,7 @@ public final class AppConst {
* 字典的大小
* @since 0.0.1
*/
public static final int DICT_SIZE = 65709;
public static final int DICT_SIZE = 65295;
/**
* 英语词典的大小

View File

@@ -2,12 +2,11 @@ package com.github.houbb.sensitive.word.support.check;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.util.lang.CharUtil;
import com.github.houbb.sensitive.word.api.ISensitiveCheck;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.constant.enums.ValidModeEnum;
import com.github.houbb.sensitive.word.support.format.CharFormatChain;
import com.github.houbb.sensitive.word.utils.NumUtils;
import com.github.houbb.sensitive.word.support.format.IgnoreNumStyleCharFormat;
/**
* 敏感词监测实现

View File

@@ -23,7 +23,12 @@ public class CharFormatChain implements ICharFormat {
List<ICharFormat> charFormats = Guavas.newArrayList();
if(context.ignoreCase()) {
charFormats.add(Instances.singleton(IgnoreCaseCharFormat.class));
}
if(context.ignoreWidth()) {
charFormats.add(Instances.singleton(IgnoreWidthCharFormat.class));
}
if(context.ignoreNumStyle()) {
charFormats.add(Instances.singleton(IgnoreNumStyleCharFormat.class));
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,51 @@
package com.github.houbb.sensitive.word.data;
import com.github.houbb.heaven.support.handler.IHandler;
import com.github.houbb.heaven.util.io.FileUtil;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.opencc4j.core.impl.ZhConvertBootstrap;
import com.github.houbb.opencc4j.support.segment.impl.CharSegment;
import com.github.houbb.sensitive.word.utils.NumUtils;
import org.junit.Ignore;
import org.junit.Test;
import java.util.List;
/**
* 数据数据的格式统一化
* @author binbin.hou
* @since 0.0.5
*/
@Ignore
public class DictNumTest {
/**
* 统一格式
*
* 1. 将所有的大写字母统一转换为小写
* 2. 将所有的全角转换为半角
* 3. 移除所有【空格】【符号】(这个就是各种符号的过滤了)
* 4. 繁体字统一转换为简体字
* @since 0.0.3
*/
@Test
@Ignore
public void formatTest() {
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
List<String> words = FileUtil.readAllLines(sourceFile);
List<String> formats = CollectionUtil.toList(words, new IHandler<String, String>() {
@Override
public String handle(String string) {
// 数字的格式化统一处理
return NumUtils.getMappingString(string);
}
});
List<String> resultList = DataUtil.disctinctAndSort(formats);
FileUtil.write(targetFile, resultList);
}
}

View File

@@ -35,7 +35,7 @@ public class DictSlimTest {
@Ignore
public void formatTest() {
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict_format.txt";
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
List<String> words = FileUtil.readAllLines(sourceFile);
@@ -68,7 +68,7 @@ public class DictSlimTest {
@Test
@Ignore
public void removeTest() {
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict_format.txt";
final String sourceFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
final String targetFile = "D:\\github\\sensitive-word\\src\\main\\resources\\dict.txt";
List<String> words = FileUtil.readAllLines(sourceFile);