mirror of
https://github.com/zongzibinbin/MallChat.git
synced 2025-12-26 04:47:53 +08:00
fix:优化AC自动机
This commit is contained in:
parent
8e6841ba9f
commit
96a3c47a49
@ -1,7 +1,7 @@
|
||||
package com.abin.mallchat.common.common.algorithm.sensitiveWord;
|
||||
|
||||
import com.abin.mallchat.common.common.algorithm.ac.ACTrie;
|
||||
import com.abin.mallchat.common.common.algorithm.ac.MatchResult;
|
||||
import com.abin.mallchat.common.common.algorithm.sensitiveWord.ac.ACTrie;
|
||||
import com.abin.mallchat.common.common.algorithm.sensitiveWord.ac.MatchResult;
|
||||
import org.HdrHistogram.ConcurrentHistogram;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
package com.abin.mallchat.common.common.algorithm.sensitiveWord;
|
||||
|
||||
import com.abin.mallchat.common.common.algorithm.ac.ACTrie;
|
||||
import com.abin.mallchat.common.common.algorithm.acpro.ACProTrie;
|
||||
import com.abin.mallchat.common.common.algorithm.sensitiveWord.acpro.ACProTrie;
|
||||
import io.micrometer.core.instrument.util.StringUtils;
|
||||
|
||||
import java.util.List;
|
||||
@ -13,6 +12,7 @@ import java.util.Objects;
|
||||
*@description: 基于ACFilter的优化增强版本
|
||||
*/
|
||||
public class ACProFilter implements SensitiveWordFilter{
|
||||
|
||||
private ACProTrie acProTrie;
|
||||
|
||||
@Override
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
package com.abin.mallchat.common.common.algorithm.ac;
|
||||
package com.abin.mallchat.common.common.algorithm.sensitiveWord.ac;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
package com.abin.mallchat.common.common.algorithm.ac;
|
||||
package com.abin.mallchat.common.common.algorithm.sensitiveWord.ac;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import lombok.Getter;
|
||||
@ -1,4 +1,4 @@
|
||||
package com.abin.mallchat.common.common.algorithm.ac;
|
||||
package com.abin.mallchat.common.common.algorithm.sensitiveWord.ac;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
@ -1,4 +1,4 @@
|
||||
package com.abin.mallchat.common.common.algorithm.acpro;
|
||||
package com.abin.mallchat.common.common.algorithm.sensitiveWord.acpro;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
@ -87,52 +87,42 @@ public class ACProTrie {
|
||||
// 匹配
|
||||
public String match(String matchWord)
|
||||
{
|
||||
Word walkNode=root;
|
||||
char[] wordArray=matchWord.toCharArray();
|
||||
for(int i=0;i<wordArray.length;i++)
|
||||
{
|
||||
// 失败回调状态
|
||||
while(!walkNode.hasChild(wordArray[i]) && walkNode.failOver!=null)
|
||||
{
|
||||
walkNode=walkNode.failOver;
|
||||
Word walkNode = root;
|
||||
char[] wordArray = matchWord.toCharArray();
|
||||
for (int i = 0; i < wordArray.length; i++) {
|
||||
// 失败"回溯"
|
||||
while (!walkNode.hasChild(wordArray[i]) && walkNode.failOver != null) {
|
||||
walkNode = walkNode.failOver;
|
||||
}
|
||||
if(walkNode.hasChild(wordArray[i])) {
|
||||
walkNode=walkNode.next.get(wordArray[i]);
|
||||
if(walkNode.end){
|
||||
if (walkNode.hasChild(wordArray[i])) {
|
||||
walkNode = walkNode.next.get(wordArray[i]);
|
||||
if (walkNode.end) {
|
||||
// sentinelA和sentinelB作为哨兵节点,去后面探测是否仍存在end
|
||||
Word sentinelA = walkNode; // 记录当前节点
|
||||
Word sentinelB = walkNode; //记录end节点
|
||||
int k = i+1;
|
||||
boolean flag=false;
|
||||
int k = i + 1;
|
||||
boolean flag = false;
|
||||
//判断end是不是最终end即敏感词是否存在包含关系(abc,abcd)
|
||||
while(k < wordArray.length && sentinelA.hasChild(wordArray[k])) {
|
||||
while (k < wordArray.length && sentinelA.hasChild(wordArray[k])) {
|
||||
sentinelA = sentinelA.next.get(wordArray[k]);
|
||||
k++;
|
||||
if(sentinelA.end)
|
||||
{
|
||||
sentinelB=sentinelA;
|
||||
flag=true;
|
||||
if (sentinelA.end) {
|
||||
sentinelB = sentinelA;
|
||||
flag = true;
|
||||
}
|
||||
}
|
||||
// 根据结果去替换*
|
||||
if(flag){
|
||||
int length=sentinelB.depth;
|
||||
while(length>0)
|
||||
{
|
||||
length--;
|
||||
wordArray[i+length]=MASK;
|
||||
}
|
||||
// 直接跳到最后的end节点failOver
|
||||
i=i+length;
|
||||
walkNode = sentinelB.failOver;
|
||||
}else{
|
||||
int length=walkNode.depth;
|
||||
while (length>0){
|
||||
length--;
|
||||
wordArray[i-length]=MASK;
|
||||
}
|
||||
walkNode = walkNode.failOver;
|
||||
// 计算替换长度
|
||||
int len = flag ? sentinelB.depth : walkNode.depth;
|
||||
while (len > 0) {
|
||||
len--;
|
||||
int index = flag ? i - walkNode.depth + 1 + len : i - len;
|
||||
wordArray[index] = MASK;
|
||||
}
|
||||
// 更新i
|
||||
i += flag ? sentinelB.depth : 0;
|
||||
// 更新node
|
||||
walkNode = flag ? sentinelB.failOver : walkNode.failOver;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -4,9 +4,6 @@ import com.abin.mallchat.common.common.algorithm.sensitiveWord.ACFilter;
|
||||
import com.abin.mallchat.common.common.algorithm.sensitiveWord.ACProFilter;
|
||||
import com.abin.mallchat.common.common.algorithm.sensitiveWord.DFAFilter;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileReader;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
@ -50,7 +47,7 @@ public class SensitiveTest {
|
||||
|
||||
@Test
|
||||
public void ACMulti() {
|
||||
List<String> sensitiveList = Arrays.asList("白痴", "你是白痴", "白痴吗");
|
||||
List<String> sensitiveList = Arrays.asList("你是白痴","你是");
|
||||
ACFilter instance = new ACFilter();
|
||||
instance.loadWord(sensitiveList);
|
||||
System.out.println(instance.filter("你是白痴吗"));
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user