release branch 0.27.1

This commit is contained in:
binbin.hou
2025-09-04 17:38:26 +08:00
parent fca702af5b
commit db18d22cc3
16 changed files with 241 additions and 26 deletions

2
.gitignore vendored
View File

@@ -31,3 +31,5 @@ target/
.DS_Store
Thumbs.db
*.jfr

View File

@@ -450,3 +450,9 @@
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|-----------|:-------------------|:---------------------------------------------------|
| 1 | F | 修正词库缺失的问题 | 2025-7-24 23:09:10 | https://github.com/houbb/sensitive-word/issues/125 |
# release_0.28.0
| 序号 | 变更类型 | 说明 | 时间 | 备注 |
|:---|:-----|------------|:------------------|:---------------------------------------------------|
| 1 | O | 优化 char 映射 | 2025-9-4 16:22:24 | https://github.com/houbb/sensitive-word/issues/131 |

View File

@@ -110,7 +110,7 @@ v0.24.0 开始内置支持对敏感词的分类细化,不过工作量比较大
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.27.1</version>
<version>0.28.0</version>
</dependency>
```

View File

@@ -6,7 +6,7 @@
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.27.1</version>
<version>0.28.0-SNAPSHOT</version>
<properties>
<!--============================== All Plugins START ==============================-->
@@ -22,7 +22,7 @@
<!--============================== MAIN ==============================-->
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.compiler.level>1.7</project.compiler.level>
<project.compiler.level>1.8</project.compiler.level>
<!--============================== INTER ==============================-->
<heaven.version>0.13.0</heaven.version>

View File

@@ -251,6 +251,23 @@ public interface IWordContext {
*/
IWordFormat wordFormat();
/**
* 设置IWordFormatText
*
* @param wordFormatText 字符处理
* @return 结果
* @since 0.3.0
*/
IWordContext wordFormatText(final IWordFormatText wordFormatText);
/**
* 文本格式化策略
*
* @return 策略
* @since 0.28.0
*/
IWordFormatText wordFormatText();
/**
* 获取 wordMap 策略
* @return 策略

View File

@@ -0,0 +1,23 @@
package com.github.houbb.sensitive.word.api;
import java.util.Map;
/**
* 单词整体格式化
*
* @author binbin.hou
* @since 0.28.0
*/
public interface IWordFormatText {
/**
* 针对 text 格式化映射,提升对整体的控制力
*
* @param text 原始 文本
* @param context 上下文
* @return 格式化后的 char
* @since 0.28.0
*/
Map<Character, Character> format(final String text, final IWordContext context);
}

View File

@@ -15,6 +15,7 @@ import com.github.houbb.sensitive.word.support.combine.check.WordCheckCombines;
import com.github.houbb.sensitive.word.support.combine.format.WordFormatCombines;
import com.github.houbb.sensitive.word.support.data.WordDatas;
import com.github.houbb.sensitive.word.support.deny.WordDenys;
import com.github.houbb.sensitive.word.support.format.mapping.WordFormatTexts;
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
import com.github.houbb.sensitive.word.support.replace.WordReplaces;
import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
@@ -219,6 +220,12 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
*/
private IWordCheck wordCheckIpv4 = WordChecks.ipv4();
/**
* 文本处理类
* @since 0.28.0
*/
private IWordFormatText wordFormatText = WordFormatTexts.defaults();
/**
* 新建验证实例
* <p>
@@ -246,6 +253,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
final IWordFormat charFormat = wordFormatCombine.initWordFormat(context);
context.wordFormat(charFormat);
// 3. 初始化对应的 Check 策略
final IWordCheck sensitiveCheck = wordCheckCombine.initWordCheck(context);
context.sensitiveCheck(sensitiveCheck);
@@ -285,6 +293,7 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
context.ignoreEnglishStyle(ignoreEnglishStyle);
context.ignoreRepeat(ignoreRepeat);
context.wordFailFast(wordFailFast);
context.wordFormatText(this.wordFormatText);
// 开启校验
context.enableNumCheck(enableNumCheck);
@@ -450,6 +459,13 @@ public class SensitiveWordBs implements ISensitiveWordDestroy {
return this;
}
public SensitiveWordBs wordFormatText(IWordFormatText wordFormatText) {
ArgUtil.notNull(wordFormatText, "wordFormatText");
this.wordFormatText = wordFormatText;
return this;
}
//-------------------------------------------------------- 基础属性设置
/**
* 是否启用 ipv4 校验

View File

@@ -106,6 +106,12 @@ public class SensitiveWordContext implements IWordContext {
*/
private IWordFormat wordFormat;
/**
* 文本格式化策略
* @since 0.28.0
*/
private IWordFormatText wordFormatText;
/**
* 单词 map 信息
*
@@ -379,6 +385,17 @@ public class SensitiveWordContext implements IWordContext {
return this;
}
@Override
public IWordContext wordFormatText(IWordFormatText wordFormatText) {
this.wordFormatText = wordFormatText;
return this;
}
@Override
public IWordFormatText wordFormatText() {
return wordFormatText;
}
public IWordTag wordTag() {
return wordTag;
}

View File

@@ -62,7 +62,7 @@ public class SensitiveWord extends AbstractSensitiveWord {
//TODO: 这里拆分为2个部分从而保障性能。但是要注意处理下标的问题。
//1. 原始的敏感词部分
//2. email/url/num 的单独一次遍历处理。
final Map<Character, Character> characterCharacterMap = InnerWordFormatUtils.formatCharsMapping(text, context);
final Map<Character, Character> characterCharacterMap = context.wordFormatText().format(text, context);
final InnerSensitiveWordContext checkContext = InnerSensitiveWordContext.newInstance()
.originalText(text)
.wordContext(context)

View File

@@ -5,6 +5,7 @@ import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.support.result.WordLengthResult;
import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
import java.util.Map;
@@ -67,7 +68,7 @@ public abstract class AbstractConditionWordCheck extends AbstractWordCheck {
char currentChar = txt.charAt(i);
// 映射处理
char mappingChar = formatCharMapping.get(currentChar);
char mappingChar = InnerWordFormatUtils.getMappingChar(formatCharMapping, currentChar);
// 符合条件
boolean currentCondition = isCharCondition(mappingChar, i, checkContext);

View File

@@ -9,6 +9,7 @@ import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
import com.github.houbb.sensitive.word.constant.enums.WordTypeEnum;
import com.github.houbb.sensitive.word.constant.enums.WordContainsTypeEnum;
import com.github.houbb.sensitive.word.support.result.WordLengthResult;
import com.github.houbb.sensitive.word.utils.InnerWordFormatUtils;
import java.util.Map;
@@ -57,7 +58,7 @@ public class WordCheckWord extends AbstractWordCheck {
skipLen++;
continue;
}
char mappingChar = formatCharMapping.get(rawChars[i]);
char mappingChar = InnerWordFormatUtils.getMappingChar(formatCharMapping, rawChars[i]);
stringBuilder.append(mappingChar);
tempLen++;

View File

@@ -0,0 +1,29 @@
package com.github.houbb.sensitive.word.support.format.mapping;
import com.github.houbb.heaven.util.lang.StringUtil;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordFormatText;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
/**
* 抽象实现
* @author binbin.hou
* @since 0.28.0
*/
public abstract class AbstractWordFormatText implements IWordFormatText {
protected abstract Map<Character, Character> doFormat(String text, IWordContext context);
@Override
public Map<Character, Character> format(String text, IWordContext context) {
if(StringUtil.isEmpty(text)) {
return Collections.emptyMap();
}
return doFormat(text, context);
}
}

View File

@@ -0,0 +1,41 @@
package com.github.houbb.sensitive.word.support.format.mapping;
import com.github.houbb.sensitive.word.api.IWordContext;
import com.github.houbb.sensitive.word.api.IWordFormat;
import com.github.houbb.sensitive.word.support.check.WordCheckNone;
import com.github.houbb.sensitive.word.support.format.WordFormatNone;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
/**
* 默认实现
*
* @author d
* @since 0.28.0
*/
public class WordFormatTextDefault extends AbstractWordFormatText {
@Override
protected Map<Character, Character> doFormat(String text, IWordContext context) {
// 单个字符串里信息
final IWordFormat wordFormat = context.wordFormat();
// 不需要处理的场景
if(wordFormat.getClass().getName().equals(WordFormatNone.class.getName())) {
return Collections.emptyMap();
}
Map<Character, Character> map = new HashMap<>();
for(int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
char mc = wordFormat.format(c, context);
if(c != mc) {
map.put(c, mc);
}
}
return map;
}
}

View File

@@ -0,0 +1,18 @@
package com.github.houbb.sensitive.word.support.format.mapping;
import com.github.houbb.sensitive.word.api.IWordFormatText;
/**
* 格式化工具类
* @author binbin.hou
* @since 0.28.0
*/
public final class WordFormatTexts {
private WordFormatTexts(){}
public static IWordFormatText defaults() {
return new WordFormatTextDefault();
}
}

View File

@@ -46,29 +46,17 @@ public final class InnerWordFormatUtils {
/**
* 字符串统一的格式化处理
* @param original 原始文本
* @param context 上下文
* @param map 映射集合
* @param c 原始
* @return 结果
* @since 0.6.0
* @since 0.28.0
*/
public static Map<Character, Character> formatCharsMapping(final String original, final IWordContext context) {
if(StringUtil.isEmpty(original)) {
return Collections.emptyMap();
public static char getMappingChar(final Map<Character, Character> map, char c) {
Character mc = map.get(c);
if(mc != null) {
return mc;
}
final int len = original.length();
char[] rawChars = original.toCharArray();
Map<Character, Character> map = new HashMap<>(rawChars.length);
IWordFormat charFormat = context.wordFormat();
for(int i = 0; i < len; i++) {
final char currentChar = rawChars[i];
char formatChar = charFormat.format(currentChar, context);
map.put(currentChar, formatChar);
}
return map;
return c;
}
/**

View File

@@ -0,0 +1,56 @@
package com.github.houbb.sensitive.word.issues;
import com.github.houbb.sensitive.word.api.IWordDeny;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.tag.WordTags;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
public class Issue131 {
/**
* 慢在哪里?
* 和是否指定没关系,第一次就是慢,要 13ms为什么
*
* @param args
*/
public static void main(String[] args) throws IOException {
final List<String> allWord = Arrays.asList("敏感","最强","定制", "81", "医疗器械");
String demo1 = "产品尺寸参数§60mn§50mm§210枚/包§160枚/包§名称A4银色不干胶§规格60mm*40mm 送配套模板§规格70mm*50mm 送配套模板§数量每大张21枚一包10张总计210枚§数量每大张16枚一包10张总计160枚§适用激光打印机打印油性笔书写§95mm§100mn§55mm§100枚/包§80枚/包§名称 A4银色不干胶§规格95mm*55mm 送配套模板§规格100mm*70mm 送配套模板§数量每大张10枚一包10张总计100枚§数量 每大张8枚一包10张 总计80枚§100mm§120枚/包§140枚/包§规格80mm*50mm 送配套模板§规格100mm*40mm 送配套模板§数量每大张12枚一包10张总计120枚§数量§每大张14枚包10张总计140枚§适用 激光打印机打印油性笔书写§40mm§65mm§70mm§35mm§200枚/包§240枚/包§规格70mm*40mm送配套模板§规格§65mm*35mm 送配套模板§数量 每大张20枚一包10张总计200枚§每大张24枚包10张总计240枚§适 激光打印机打印油性笔书写§适用§激光打印机打印油性笔书写§40mn§280枚/包§360枚/包§规格50mm*40mm 送配套模板§规格40mm*30mm 送配套模板§数量每大张28枚一包10张总计280枚§数量每大张36枚一包10张总计360枚§45.7mm§38.1mm§400枚/包§650枚/包§45.7mm*25.4mm送配套模板§38.1mm*21.2mm 送配套模板§每大张40枚一包10张总计400枚§数量每大张65枚一包10张总计650枚§30mm§25mr§20mm§840枚/包§1260枚/包§规格 30mm*20mm 送配套模板§规格25mm*13mm 送配套模板§数量每张84枚包10张总计840枚§数量每大张126枚一包10张总计1260枚§46mm§意制§任§1000枚/包§定§名称定制A4内割银不胶§规格46mm*11.1mm送配套模板§任意规格定制§每大张100枚包10张总计1000枚§包10张满5包送专属模板§适激光打印机打印油性笔书写§产品实拍§8格打印实拍展示(100mm*70mm)§上海荠骞文化用品固定资产标识卡§资产编号:§规格型号:§资产名称:§使用状态:§资产类别:§资产原值§存放地点§生产厂家:§使用人§备§注:§*请爱护公司财产不要随意撕毁此标签§16格全内容打印实拍展示§固定资产标识卡§资产名称§四层货架平板§资产编号§3F跑菜区§规格型号§1800×500×1500§使用部门§财务部§使用时间§2019-04-26§李强§21格手写款打印展示 (60mm*40mm)§固定资标识卡§36格打印实拍展示(40mm*30mm)§固定资产标签§名称:§编号:§部门:§40格打印实拍展示(45.7mm*25.4mm)§固定资§名称电脑§编号20210§部门财务部§20210201§使用人我最强§八找最强§编号20210201§65格打印实拍展示(38mm*21mm)§名称:§编号:§数量:§数量:§100格打印实拍展示(46mm*11.1mm)§客服电话159 9569 3815§: 159 9569 3815§.§客服电话159 9569§客服电话1599§客服电话§服电话159 9569 3815§话159 9569 3815§客服电话1599569 3815§电话159 9569 3815§9569 3815§159 9569 3815§客服电话§低值易耗品标识牌(70mm*50mm)§购买日期§保管部门§责任人§生产厂家§不要随意撕毁此标牌*§*请爱护公司财产,不要随意撕导§品标识牌§低值易耗品标识牌§随意撕毁此标牌*§*请爱护公司财产,不要随意撕毁此标牌*§三人沙发§行政酒廊§2200*860*900§2018-07-23§应用范围§多用于产品信息固有资产登记航空仓库管理 医疗政府机构等§Mainly used for product information inherent assets registration, aviation warehouse management, medi§cal government institutions, etc§政府单位§企业办公§仓储行业§医疗器械§教育单位§耐用品§电子产品包装§商城卖场";
// 初始化敏感词库
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
.wordFailFast(true)
.wordAllow(WordAllows.empty())
.wordDeny(new IWordDeny() {
@Override
public List<String> deny() {
return allWord;
}
})
.ignoreChineseStyle(false)
.ignoreCase(false)
.ignoreEnglishStyle(false)
.ignoreNumStyle(false)
.ignoreRepeat(false)
.ignoreWidth(false)
.wordTag(WordTags.none())
.init();
long time = System.currentTimeMillis();
costTimeTest(sensitiveWordBs, demo1);
long cTime = System.currentTimeMillis() - time;
System.out.println("---DONE"+cTime);
}
private static void costTimeTest(SensitiveWordBs sensitiveWordBs, String demo1) throws IOException {
int count = 10000;
for (int i = 0; i < count; i++) {
List<String> emitWord1 = sensitiveWordBs.findAll(demo1);
}
}
}