Merge pull request #105 from 1045078399/sensitive

feat:敏感词封装
This commit is contained in:
zongzibinbin 2023-07-16 01:40:56 +08:00 committed by GitHub
commit ac34a522e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 253 additions and 85 deletions

View File

@ -0,0 +1,30 @@
package com.abin.mallchat.common.common.config;
import com.abin.mallchat.common.common.utils.sensitiveWord.DFAFilter;
import com.abin.mallchat.common.common.utils.sensitiveWord.SensitiveWordBs;
import com.abin.mallchat.common.sensitive.MyWordDeny;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class SensitiveWordConfig {
@Autowired
private MyWordDeny myWordDeny;
/**
* 初始化引导类
*
* @return 初始化引导类
* @since 1.0.0
*/
@Bean
public SensitiveWordBs sensitiveWordBs() {
return SensitiveWordBs.newInstance()
.filterStrategy(DFAFilter.getInstance())
.sensitiveWord(myWordDeny)
.init();
}
}

View File

@ -1,4 +1,4 @@
package com.abin.mallchat.common.common.utils;
package com.abin.mallchat.common.common.utils.sensitiveWord;
import com.abin.mallchat.common.common.algorithm.ac.ACTrie;
import com.abin.mallchat.common.common.algorithm.ac.MatchResult;
@ -15,7 +15,7 @@ import java.util.Objects;
*
* Created by berg on 2023/6/18.
*/
public class SensitiveWordUtils0 {
public class ACFilter implements SensitiveWordFilter {
private final static char mask_char = '*'; // 替代字符
@ -27,7 +27,7 @@ public class SensitiveWordUtils0 {
* @param text 文本
* @return boolean
*/
public static boolean hasSensitiveWord(String text) {
public boolean hasSensitiveWord(String text) {
if (StringUtils.isBlank(text)) return false;
return !Objects.equals(filter(text), text);
}
@ -38,7 +38,7 @@ public class SensitiveWordUtils0 {
* @param text 待替换文本
* @return 替换后的文本
*/
public static String filter(String text) {
public String filter(String text) {
if (StringUtils.isBlank(text)) return text;
List<MatchResult> matchResults = ac_trie.matches(text);
StringBuffer result = new StringBuffer(text);
@ -62,7 +62,7 @@ public class SensitiveWordUtils0 {
*
* @param words 敏感词数组
*/
public static void loadWord(List<String> words) {
public void loadWord(List<String> words) {
if (words == null) return;
ac_trie = new ACTrie(words);
}

View File

@ -1,4 +1,4 @@
package com.abin.mallchat.common.common.utils;
package com.abin.mallchat.common.common.utils.sensitiveWord;
import org.apache.commons.lang3.StringUtils;
import org.springframework.util.CollectionUtils;
@ -18,7 +18,10 @@ import java.util.*;
* @author zhaoyuhang
* @date 2023/06/19
*/
public final class SensitiveWordUtils {
public final class DFAFilter implements SensitiveWordFilter {
private DFAFilter() {
}
private static Word root = new Word(' '); // 敏感词字典的根节点
private final static char replace = '*'; // 替代字符
private final static String skipChars = " !*-+_=,.@;:;:。、??()【】[]《》<>“”\""; // 遇到这些字符就会跳过
@ -30,6 +33,10 @@ public final class SensitiveWordUtils {
}
}
public static DFAFilter getInstance() {
return new DFAFilter();
}
/**
* 判断文本中是否存在敏感词
@ -37,7 +44,7 @@ public final class SensitiveWordUtils {
* @param text 文本
* @return true: 存在敏感词, false: 不存在敏感词
*/
public static boolean hasSensitiveWord(String text) {
public boolean hasSensitiveWord(String text) {
if (StringUtils.isBlank(text)) return false;
return !Objects.equals(filter(text), text);
}
@ -48,7 +55,7 @@ public final class SensitiveWordUtils {
* @param text 待替换文本
* @return 替换后的文本
*/
public static String filter(String text) {
public String filter(String text) {
StringBuilder result = new StringBuilder(text);
int index = 0;
while (index < result.length()) {
@ -93,7 +100,7 @@ public final class SensitiveWordUtils {
*
* @param words 敏感词数组
*/
public static void loadWord(List<String> words) {
public void loadWord(List<String> words) {
if (!CollectionUtils.isEmpty(words)) {
Word newRoot = new Word(' ');
words.forEach(word -> loadWord(word, newRoot));
@ -106,7 +113,7 @@ public final class SensitiveWordUtils {
*
* @param word
*/
public static void loadWord(String word, Word root) {
public void loadWord(String word, Word root) {
if (StringUtils.isBlank(word)) {
return;
}
@ -136,7 +143,7 @@ public final class SensitiveWordUtils {
*
* @param path 文本文件的绝对路径
*/
public static void loadWordFromFile(String path) {
public void loadWordFromFile(String path) {
try (InputStream inputStream = Files.newInputStream(Paths.get(path))) {
loadWord(inputStream);
} catch (IOException e) {
@ -150,7 +157,7 @@ public final class SensitiveWordUtils {
* @param inputStream 文本文件输入流
* @throws IOException IO异常
*/
public static void loadWord(InputStream inputStream) throws IOException {
public void loadWord(InputStream inputStream) throws IOException {
try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
String line;
ArrayList<String> list = new ArrayList<>();
@ -167,7 +174,7 @@ public final class SensitiveWordUtils {
* @param c 待检测字符
* @return true: 需要跳过, false: 不需要跳过
*/
private static boolean skip(char c) {
private boolean skip(char c) {
return skipSet.contains(c);
}
@ -186,17 +193,7 @@ public final class SensitiveWordUtils {
public Word(char c) {
this.c = c;
this.end = false;
this.next = new HashMap<>();
}
}
public static void main(String[] args) {
String text = "白日,梦";
String filter = filter(text);
System.out.println(filter);
}
}

View File

@ -0,0 +1,18 @@
package com.abin.mallchat.common.common.utils.sensitiveWord;
import java.util.List;
/**
* 敏感词
*
* @author zhaoyuhang
* @date 2023/07/09
*/
public interface IWordDeny {
/**
* 获取结果
* @return 结果
* @since 0.0.13
*/
List<String> deny();
}

View File

@ -0,0 +1,102 @@
package com.abin.mallchat.common.common.utils.sensitiveWord;
import java.util.List;
/**
* 敏感词引导类
*
* @author zhaoyuhang
* @date 2023/07/08
*/
public class SensitiveWordBs {
/**
* 私有化构造器
*/
private SensitiveWordBs() {
}
/**
* 脱敏策略
*/
private SensitiveWordFilter sensitiveWordFilter = DFAFilter.getInstance();
/**
* 敏感词列表
*/
private IWordDeny wordDeny;
public static SensitiveWordBs newInstance() {
return new SensitiveWordBs();
}
/**
* 初始化
*
* 1. 根据配置初始化对应的 map比较消耗性能
* @since 0.0.13
* @return this
*/
public SensitiveWordBs init() {
List<String> words = wordDeny.deny();
loadWord(words);
return this;
}
/**
* 过滤策略
*
* @param filter 过滤器
* @return 结果
* @since 0.7.0
*/
public SensitiveWordBs filterStrategy(SensitiveWordFilter filter) {
if (filter == null) {
throw new IllegalArgumentException("filter can not be null");
}
this.sensitiveWordFilter = filter;
return this;
}
public SensitiveWordBs sensitiveWord(IWordDeny wordDeny) {
if (wordDeny == null) {
throw new IllegalArgumentException("wordDeny can not be null");
}
this.wordDeny = wordDeny;
return this;
}
/**
* 有敏感词
*
* @param text 文本
* @return boolean
*/
public boolean hasSensitiveWord(String text) {
return sensitiveWordFilter.hasSensitiveWord(text);
}
/**
* 过滤
*
* @param text 文本
* @return {@link String}
*/
public String filter(String text) {
return sensitiveWordFilter.filter(text);
}
/**
* 加载敏感词列表
*
* @param words 敏感词数组
*/
private void loadWord(List<String> words) {
sensitiveWordFilter.loadWord(words);
}
}

View File

@ -0,0 +1,37 @@
package com.abin.mallchat.common.common.utils.sensitiveWord;
import java.util.List;
/**
* 敏感词过滤
*
* @author zhaoyuhang
* @date 2023/07/08
*/
public interface SensitiveWordFilter {
/**
* 有敏感词
*
* @param text 文本
* @return boolean
*/
boolean hasSensitiveWord(String text);
/**
* 过滤
*
* @param text 文本
* @return {@link String}
*/
String filter(String text);
/**
* 加载敏感词列表
*
* @param words 敏感词数组
*/
void loadWord(List<String> words);
}

View File

@ -0,0 +1,24 @@
package com.abin.mallchat.common.sensitive;
import com.abin.mallchat.common.common.utils.sensitiveWord.IWordDeny;
import com.abin.mallchat.common.sensitive.dao.SensitiveWordDao;
import com.abin.mallchat.common.sensitive.domain.SensitiveWord;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.stream.Collectors;
@Component
public class MyWordDeny implements IWordDeny {
@Autowired
private SensitiveWordDao sensitiveWordDao;
@Override
public List<String> deny() {
return sensitiveWordDao.list()
.stream()
.map(SensitiveWord::getWord)
.collect(Collectors.toList());
}
}

View File

@ -1,5 +0,0 @@
package com.abin.mallchat.common.sensitive.service;
public interface ISensitiveWordService {
}

View File

@ -1,39 +0,0 @@
package com.abin.mallchat.common.sensitive.service.impl;
import com.abin.mallchat.common.common.utils.SensitiveWordUtils;
import com.abin.mallchat.common.sensitive.dao.SensitiveWordDao;
import com.abin.mallchat.common.sensitive.domain.SensitiveWord;
import com.abin.mallchat.common.sensitive.service.ISensitiveWordService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import javax.annotation.PostConstruct;
import java.util.List;
import java.util.stream.Collectors;
@Service
@Slf4j
public class SensitiveWordServiceImpl implements ISensitiveWordService {
@Autowired
private SensitiveWordDao sensitiveWordDao;
@Autowired
private ThreadPoolTaskExecutor threadPoolTaskExecutor;
@PostConstruct
public void initSensitiveWord() {
threadPoolTaskExecutor.execute(() -> {
log.info("[initSensitiveWord] start");
List<SensitiveWord> list = sensitiveWordDao.list();
if (!CollectionUtils.isEmpty(list)) {
List<String> wordList = list.stream()
.map(SensitiveWord::getWord)
.collect(Collectors.toList());
SensitiveWordUtils.loadWord(wordList);
}
log.info("[initSensitiveWord] end; loading sensitiveWords num:{}", list.size());
});
}
}

View File

@ -10,9 +10,9 @@ import com.abin.mallchat.common.chat.domain.enums.MessageTypeEnum;
import com.abin.mallchat.common.chat.service.cache.MsgCache;
import com.abin.mallchat.common.common.domain.enums.YesOrNoEnum;
import com.abin.mallchat.common.common.utils.AssertUtil;
import com.abin.mallchat.common.common.utils.SensitiveWordUtils;
import com.abin.mallchat.common.common.utils.discover.PrioritizedUrlDiscover;
import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
import com.abin.mallchat.common.common.utils.sensitiveWord.SensitiveWordBs;
import com.abin.mallchat.common.user.domain.entity.User;
import com.abin.mallchat.common.user.domain.enums.RoleEnum;
import com.abin.mallchat.common.user.service.IRoleService;
@ -47,7 +47,9 @@ public class TextMsgHandler extends AbstractMsgHandler {
private UserInfoCache userInfoCache;
@Autowired
private IRoleService iRoleService;
@Autowired
private SensitiveWordBs sensitiveWordBs;
private static final PrioritizedUrlDiscover URL_TITLE_DISCOVER = new PrioritizedUrlDiscover();
@Override
@ -82,7 +84,7 @@ public class TextMsgHandler extends AbstractMsgHandler {
MessageExtra extra = Optional.ofNullable(msg.getExtra()).orElse(new MessageExtra());
Message update = new Message();
update.setId(msg.getId());
update.setContent(SensitiveWordUtils.filter(body.getContent()));
update.setContent(sensitiveWordBs.filter(body.getContent()));
update.setExtra(extra);
//如果有回复消息
if (Objects.nonNull(body.getReplyMsgId())) {

View File

@ -36,16 +36,18 @@ public class GPTChatAIHandler extends AbstractChatAIHandler {
@Override
protected void init() {
super.init();
UserInfoResp userInfo = userService.getUserInfo(chatGPTProperties.getAIUserId());
if (userInfo == null) {
log.error("根据AIUserId:{} 找不到用户信息", chatGPTProperties.getAIUserId());
throw new RuntimeException("根据AIUserId: " + chatGPTProperties.getAIUserId() + " 找不到用户信息");
if (isUse()) {
UserInfoResp userInfo = userService.getUserInfo(chatGPTProperties.getAIUserId());
if (userInfo == null) {
log.error("根据AIUserId:{} 找不到用户信息", chatGPTProperties.getAIUserId());
throw new RuntimeException("根据AIUserId: " + chatGPTProperties.getAIUserId() + " 找不到用户信息");
}
if (StringUtils.isBlank(userInfo.getName())) {
log.warn("根据AIUserId:{} 找到的用户信息没有name", chatGPTProperties.getAIUserId());
throw new RuntimeException("根据AIUserId: " + chatGPTProperties.getAIUserId() + " 找到的用户没有名字");
}
AI_NAME = userInfo.getName();
}
if (StringUtils.isBlank(userInfo.getName())) {
log.warn("根据AIUserId:{} 找到的用户信息没有name", chatGPTProperties.getAIUserId());
throw new RuntimeException("根据AIUserId: " + chatGPTProperties.getAIUserId() + " 找到的用户没有名字");
}
AI_NAME = userInfo.getName();
}
@Override
@ -90,14 +92,12 @@ public class GPTChatAIHandler extends AbstractChatAIHandler {
text = ChatGPTUtils.parseText(response);
} catch (Exception e) {
log.warn("gpt doChat warn:", e);
text= "我累了,明天再聊吧";
text = "我累了,明天再聊吧";
}
return text;
}
@Override
protected boolean supports(Message message) {
if (!chatGPTProperties.isUse()) {

View File

@ -4,7 +4,7 @@ import cn.hutool.core.util.StrUtil;
import com.abin.mallchat.common.common.event.UserBlackEvent;
import com.abin.mallchat.common.common.event.UserRegisterEvent;
import com.abin.mallchat.common.common.utils.AssertUtil;
import com.abin.mallchat.common.common.utils.SensitiveWordUtils;
import com.abin.mallchat.common.common.utils.sensitiveWord.SensitiveWordBs;
import com.abin.mallchat.common.user.dao.BlackDao;
import com.abin.mallchat.common.user.dao.ItemConfigDao;
import com.abin.mallchat.common.user.dao.UserBackpackDao;
@ -63,6 +63,8 @@ public class UserServiceImpl implements UserService {
private BlackDao blackDao;
@Autowired
private UserSummaryCache userSummaryCache;
@Autowired
private SensitiveWordBs sensitiveWordBs;
@Override
public UserInfoResp getUserInfo(Long uid) {
@ -76,7 +78,7 @@ public class UserServiceImpl implements UserService {
public void modifyName(Long uid, ModifyNameReq req) {
//判断名字是不是重复
String newName = req.getName();
AssertUtil.isFalse(SensitiveWordUtils.hasSensitiveWord(newName), "名字中包含敏感词,请重新输入"); // 判断名字中有没有敏感词
AssertUtil.isFalse(sensitiveWordBs.hasSensitiveWord(newName), "名字中包含敏感词,请重新输入"); // 判断名字中有没有敏感词
User oldUser = userDao.getByName(newName);
AssertUtil.isEmpty(oldUser, "名字已经被抢占了,请换一个哦~~");
//判断改名卡够不够