release branch 0.0.13

This commit is contained in:
houbb
2021-05-12 22:25:24 +08:00
parent f97c48a650
commit 8b4d0d4560
18 changed files with 655 additions and 48 deletions

View File

@@ -0,0 +1,19 @@
package com.github.houbb.sensitive.word.api;
import java.util.List;
/**
* 允许的内容-返回的内容不被当做敏感词
* @author binbin.hou
* @since 0.0.13
*/
public interface IWordAllow {
/**
* 获取结果
* @return 结果
* @since 0.0.13
*/
List<String> allow();
}

View File

@@ -7,6 +7,7 @@ import java.util.List;
* @author binbin.hou
* @since 0.0.1
*/
@Deprecated
public interface IWordData {
/**

View File

@@ -0,0 +1,19 @@
package com.github.houbb.sensitive.word.api;
import java.util.List;
/**
* 拒绝出现的数据-返回的内容被当做是敏感词
* @author binbin.hou
* @since 0.0.13
*/
public interface IWordDeny {
/**
* 获取结果
* @return 结果
* @since 0.0.13
*/
List<String> deny();
}

View File

@@ -1,10 +1,13 @@
package com.github.houbb.sensitive.word.bs;
import com.github.houbb.heaven.constant.CharConst;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordData;
import com.github.houbb.sensitive.word.api.IWordMap;
import com.github.houbb.heaven.util.common.ArgUtil;
import com.github.houbb.heaven.util.util.CollectionUtil;
import com.github.houbb.sensitive.word.api.*;
import com.github.houbb.sensitive.word.exception.SensitiveWordException;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.data.SensitiveWordData;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
import com.github.houbb.sensitive.word.support.map.SensitiveWordMap;
import java.util.List;
@@ -30,37 +33,42 @@ public class SensitiveWordBs {
*
* @since 0.0.1
*/
private static volatile IWordMap sensitiveWordMap;
private IWordMap sensitiveWordMap;
/**
* 默认的执行上下文
*
* @since 0.0.4
*/
private volatile IWordContext context;
private final IWordContext context = buildDefaultContext();
/**
* 禁止的单词
* @since 0.0.13
*/
private IWordDeny wordDeny = WordDenys.system();
/**
* 允许的单词
* @since 0.0.13
*/
private IWordAllow wordAllow = WordAllows.system();
/**
* DCL 初始化 wordMap 信息
*
* @return 初始化后的结果
* 注意map 的构建是一个比较耗时的动作
* @since 0.0.4
*/
private static IWordMap initWordMap() {
if (sensitiveWordMap == null) {
synchronized (IWordMap.class) {
if (sensitiveWordMap == null) {
// 加载配置信息
IWordData wordData = new SensitiveWordData();
List<String> lines = wordData.getWordData();
private synchronized void initWordMap() {
// 加载配置信息
List<String> denyList = wordDeny.deny();
List<String> allowList = wordAllow.allow();
List<String> results = CollectionUtil.difference(denyList, allowList);
// 初始化 DFA 信息
sensitiveWordMap = new SensitiveWordMap();
sensitiveWordMap.initWordMap(lines);
}
}
}
return sensitiveWordMap;
// 初始化 DFA 信息
sensitiveWordMap = new SensitiveWordMap();
sensitiveWordMap.initWordMap(results);
}
/**
@@ -72,12 +80,44 @@ public class SensitiveWordBs {
* @since 0.0.1
*/
public static SensitiveWordBs newInstance() {
initWordMap();
return new SensitiveWordBs();
}
SensitiveWordBs bs = new SensitiveWordBs();
bs.context = buildDefaultContext();
/**
* 初始化
*
* 1. 根据配置,初始化对应的 map。比较消耗性能。
* @since 0.0.13
* @return this
*/
public SensitiveWordBs init() {
this.initWordMap();
return bs;
return this;
}
/**
* 设置禁止的实现
* @param wordDeny 禁止的实现
* @return this
* @since 0.0.13
*/
public SensitiveWordBs wordDeny(IWordDeny wordDeny) {
ArgUtil.notNull(wordDeny, "wordDeny");
this.wordDeny = wordDeny;
return this;
}
/**
* 设置允许的实现
* @param wordAllow 允许的实现
* @return this
* @since 0.0.13
*/
public SensitiveWordBs wordAllow(IWordAllow wordAllow) {
ArgUtil.notNull(wordAllow, "wordAllow");
this.wordAllow = wordAllow;
return this;
}
/**
@@ -122,7 +162,7 @@ public class SensitiveWordBs {
* @return 结果
* @since 0.0.4
*/
private static IWordContext buildDefaultContext() {
private IWordContext buildDefaultContext() {
IWordContext wordContext = SensitiveWordContext.newInstance();
// 格式统一化
wordContext.ignoreCase(true);
@@ -148,6 +188,8 @@ public class SensitiveWordBs {
* @since 0.0.1
*/
public boolean contains(final String target) {
statusCheck();
return sensitiveWordMap.contains(target, context);
}
@@ -161,6 +203,8 @@ public class SensitiveWordBs {
* @since 0.0.1
*/
public List<String> findAll(final String target) {
statusCheck();
return sensitiveWordMap.findAll(target, context);
}
@@ -173,6 +217,8 @@ public class SensitiveWordBs {
* @since 0.0.1
*/
public String findFirst(final String target) {
statusCheck();
return sensitiveWordMap.findFirst(target, context);
}
@@ -185,6 +231,8 @@ public class SensitiveWordBs {
* @since 0.0.2
*/
public String replace(final String target, final char replaceChar) {
statusCheck();
return sensitiveWordMap.replace(target, replaceChar, context);
}
@@ -200,4 +248,15 @@ public class SensitiveWordBs {
return this.replace(target, CharConst.STAR);
}
/**
* 状态校验
* @since 0.0.13
*/
private void statusCheck(){
if(sensitiveWordMap == null) {
this.init();
}
}
}

View File

@@ -0,0 +1,82 @@
package com.github.houbb.sensitive.word.core;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
import java.util.List;
/**
* 敏感词工具类
* @author binbin.hou
* @since 0.0.13
*/
public final class SensitiveWordHelper {
private SensitiveWordHelper(){}
/**
* 默认的实现
* @since 0.0.13
*/
private static final SensitiveWordBs WORD_BS = SensitiveWordBs.newInstance().init();
/**
* 是否包含敏感词
*
* @param target 目标字符串
* @return 是否
* @since 0.0.13
*/
public static boolean contains(final String target) {
return WORD_BS.contains(target);
}
/**
* 返回所有的敏感词
* 1. 这里是默认去重的,且是有序的。
* 2. 如果不存在,返回空列表
*
* @param target 目标字符串
* @return 敏感词列表
* @since 0.0.1
*/
public static List<String> findAll(final String target) {
return WORD_BS.findAll(target);
}
/**
* 返回第一个敏感词
* 1如果不存在则返回 {@code null}
*
* @param target 目标字符串
* @return 敏感词
* @since 0.0.13
*/
public static String findFirst(final String target) {
return WORD_BS.findFirst(target);
}
/**
* 替换所有内容
*
* @param target 目标字符串
* @param replaceChar 替换为的 char
* @return 替换后结果
* @since 0.0.13
*/
public static String replace(final String target, final char replaceChar) {
return WORD_BS.replace(target, replaceChar);
}
/**
* 替换所有内容
* 1. 默认使用空格替换,避免星号改变 md 的格式。
*
* @param target 目标字符串
* @return 替换后结果
* @since 0.0.13
*/
public static String replace(final String target) {
return WORD_BS.replace(target);
}
}

View File

@@ -0,0 +1,43 @@
package com.github.houbb.sensitive.word.support.allow;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
import com.github.houbb.sensitive.word.api.IWordAllow;
import java.util.ArrayList;
import java.util.List;
/**
* 初始化类
*
* @author binbin.hou
* @since 0.0.13
*/
@ThreadSafe
public abstract class WordAllowInit implements IWordAllow {
/**
* 初始化列表
*
* @param pipeline 当前列表泳道
* @since 0.0.13
*/
protected abstract void init(final Pipeline<IWordAllow> pipeline);
@Override
public List<String> allow() {
Pipeline<IWordAllow> pipeline = new DefaultPipeline<>();
this.init(pipeline);
List<String> results = new ArrayList<>();
List<IWordAllow> wordAllows = pipeline.list();
for (IWordAllow wordAllow : wordAllows) {
List<String> allowList = wordAllow.allow();
results.addAll(allowList);
}
return results;
}
}

View File

@@ -0,0 +1,23 @@
package com.github.houbb.sensitive.word.support.allow;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.util.io.StreamUtil;
import com.github.houbb.sensitive.word.api.IWordAllow;
import com.github.houbb.sensitive.word.api.IWordDeny;
import java.util.List;
/**
* 系统默认的信息
* @author binbin.hou
* @since 0.0.13
*/
@ThreadSafe
public class WordAllowSystem implements IWordAllow {
@Override
public List<String> allow() {
return StreamUtil.readAllLines("/sensitive_word_allow.txt");
}
}

View File

@@ -0,0 +1,49 @@
package com.github.houbb.sensitive.word.support.allow;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.util.util.ArrayUtil;
import com.github.houbb.sensitive.word.api.IWordAllow;
/**
* 所有允许的结果
* @author binbin.hou
* @since 0.0.13
*/
public final class WordAllows {
private WordAllows(){}
/**
* 责任链
* @param wordAllow 允许
* @param others 其他
* @return 结果
* @since 0.0.13
*/
public static IWordAllow chains(final IWordAllow wordAllow,
final IWordAllow... others) {
return new WordAllowInit() {
@Override
protected void init(Pipeline<IWordAllow> pipeline) {
pipeline.addLast(wordAllow);
if(ArrayUtil.isNotEmpty(others)) {
for(IWordAllow other : others) {
pipeline.addLast(other);
}
}
}
};
}
/**
* 系统实现
* @return 结果
* @since 0.0.13
*/
public static IWordAllow system() {
return Instances.singleton(WordAllowSystem.class);
}
}

View File

@@ -0,0 +1,44 @@
package com.github.houbb.sensitive.word.support.deny;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.support.pipeline.impl.DefaultPipeline;
import com.github.houbb.heaven.util.io.StreamUtil;
import com.github.houbb.sensitive.word.api.IWordDeny;
import java.util.ArrayList;
import java.util.List;
/**
* 初始化类
*
* @author binbin.hou
* @since 0.0.13
*/
@ThreadSafe
public abstract class WordDenyInit implements IWordDeny {
/**
* 初始化列表
*
* @param pipeline 当前列表泳道
* @since 0.0.13
*/
protected abstract void init(final Pipeline<IWordDeny> pipeline);
@Override
public List<String> deny() {
Pipeline<IWordDeny> pipeline = new DefaultPipeline<>();
this.init(pipeline);
List<String> results = new ArrayList<>();
List<IWordDeny> wordDenies = pipeline.list();
for (IWordDeny wordDeny : wordDenies) {
List<String> denyList = wordDeny.deny();
results.addAll(denyList);
}
return results;
}
}

View File

@@ -0,0 +1,27 @@
package com.github.houbb.sensitive.word.support.deny;
import com.github.houbb.heaven.annotation.ThreadSafe;
import com.github.houbb.heaven.util.guava.Guavas;
import com.github.houbb.heaven.util.io.StreamUtil;
import com.github.houbb.sensitive.word.api.IWordDeny;
import com.github.houbb.sensitive.word.constant.AppConst;
import java.util.List;
/**
* 系统默认的信息
* @author binbin.hou
* @since 0.0.13
*/
@ThreadSafe
public class WordDenySystem implements IWordDeny {
@Override
public List<String> deny() {
List<String> results = StreamUtil.readAllLines("/dict.txt");
results.addAll(StreamUtil.readAllLines("/dict_en.txt"));
results.addAll(StreamUtil.readAllLines("/sensitive_word_deny.txt"));
return results;
}
}

View File

@@ -0,0 +1,49 @@
package com.github.houbb.sensitive.word.support.deny;
import com.github.houbb.heaven.support.instance.impl.Instances;
import com.github.houbb.heaven.support.pipeline.Pipeline;
import com.github.houbb.heaven.util.util.ArrayUtil;
import com.github.houbb.sensitive.word.api.IWordDeny;
/**
* 所有拒绝的结果
* @author binbin.hou
* @since 0.0.13
*/
public final class WordDenys {
private WordDenys(){}
/**
* 责任链
* @param wordDeny 拒绝
* @param others 其他
* @return 结果
* @since 0.0.13
*/
public static IWordDeny chains(final IWordDeny wordDeny,
final IWordDeny... others) {
return new WordDenyInit() {
@Override
protected void init(Pipeline<IWordDeny> pipeline) {
pipeline.addLast(wordDeny);
if(ArrayUtil.isNotEmpty(others)) {
for(IWordDeny other : others) {
pipeline.addLast(other);
}
}
}
};
}
/**
* 系统实现
* @return 结果
* @since 0.0.13
*/
public static IWordDeny system() {
return Instances.singleton(WordDenySystem.class);
}
}

View File

@@ -1,4 +1,3 @@
001工程
007手机防盗软件任意显软件
007间谍专业版

View File

@@ -1,5 +1,7 @@
package com.github.houbb.sensitive.word.bs;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
import org.junit.Assert;
import org.junit.Test;
@@ -97,4 +99,15 @@ public class SensitiveWordBsTest {
Assert.assertEquals("", word);
}
@Test
public void configTest() {
SensitiveWordBs wordBs = SensitiveWordBs.newInstance()
.wordDeny(WordDenys.system())
.wordAllow(WordAllows.system())
.init();
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
Assert.assertTrue(wordBs.contains(text));
}
}

View File

@@ -0,0 +1,100 @@
package com.github.houbb.sensitive.word.core;
import org.junit.Assert;
import org.junit.Test;
import java.util.List;
/**
* <p> project: sensitive-word-SensitiveWordBsTest </p>
* <p> create on 2020/1/7 23:43 </p>
*
* @author Administrator
* @since 0.0.13
*/
public class SensitiveWordHelperTest {
/**
* 是否包含
* @since 0.0.1
*/
@Test
public void containsTest() {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
Assert.assertTrue(SensitiveWordHelper.contains(text));
}
/**
* 返回所有敏感词
* @since 0.0.1
*/
@Test
public void findAllTest() {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
List<String> wordList = SensitiveWordHelper.findAll(text);
Assert.assertEquals("[五星红旗, 毛主席, 天安门]", wordList.toString());
}
/**
* 返回所有第一个匹配的敏感词
* @since 0.0.1
*/
@Test
public void findFirstTest() {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
String word = SensitiveWordHelper.findFirst(text);
Assert.assertEquals("五星红旗", word);
}
/**
* 默认的替换策略
* @since 0.0.2
*/
@Test
public void replaceTest() {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
String result = SensitiveWordHelper.replace(text);
Assert.assertEquals("****迎风飘扬,***的画像屹立在***前。", result);
}
/**
* 自定义字符的替换策略
* @since 0.0.2
*/
@Test
public void replaceCharTest() {
final String text = "五星红旗迎风飘扬,毛主席的画像屹立在天安门前。";
String result = SensitiveWordHelper.replace(text, '0');
Assert.assertEquals("0000迎风飘扬000的画像屹立在000前。", result);
}
/**
* 忽略大小写
* @since 0.0.4
*/
@Test
public void ignoreCaseTest() {
final String text = "fuCK the bad words.";
String word = SensitiveWordHelper.findFirst(text);
Assert.assertEquals("fuCK", word);
}
/**
* 忽略半角圆角
* @since 0.0.4
*/
@Test
public void ignoreWidthTest() {
final String text = " the bad words.";
String word = SensitiveWordHelper.findFirst(text);
Assert.assertEquals("", word);
}
}