release branch 0.11.0

2026-03-22 08:27:36 +08:00 · 2023-12-09 00:38:10 +08:00
parent 2ddb9e1ac6
commit 56df8bd648
16 changed files with 251 additions and 6 deletions
--- a/CHANGE_LOG.md
+++ b/CHANGE_LOG.md
@@ -212,3 +212,9 @@
 | 序号 | 变更类型 | 说明               | 时间                  | 备注    |
 |:---|:-----|------------------|:--------------------|:------|
 | 1  | A    | 添加脏词的标签接口，便于后续拓展 | 2023-12-05 23:51:58 |  |
+
+# release_0.11.0
+
+| 序号 | 变更类型 | 说明                   | 时间                  | 备注    |
+|:---|:-----|----------------------|:--------------------|:------|
+| 1  | A    | 添加忽略字符接口，便于跳过一些干扰的字符 | 2023-12-08 23:51:58 |  |
--- a/README.md
+++ b/README.md
@@ -42,6 +42,8 @@

 - [支持敏感词的标签接口](https://github.com/houbb/sensitive-word#%E6%95%8F%E6%84%9F%E8%AF%8D%E6%A0%87%E7%AD%BE)

+- [支持跳过一些特殊字符，让匹配更灵活]()
+
 ## 变更日志

 [CHANGE_LOG.md](https://github.com/houbb/sensitive-word/blob/master/CHANGE_LOG.md)
@@ -60,7 +62,7 @@
 <dependency>
    <groupId>com.github.houbb</groupId>
    <artifactId>sensitive-word</artifactId>
-    <version>0.10.0</version>
+    <version>0.11.0</version>
 </dependency>
 ```

@@ -390,6 +392,41 @@ Assert.assertTrue(wordBs.contains(text));
 | 10 | enableWordCheck      | 是否启用敏感单词检测    | true   |
 | 11 | numCheckLen          | 数字检测，自定义指定长度。 | 8      |

+
+# 忽略字符
+
+## 说明
+
+我们的敏感词一般都是比较连续的，比如【傻帽】
+
+那就有大聪明发现，可以在中间加一些字符，比如【傻!@#$帽】跳过检测，但是骂人等攻击力不减。
+
+那么，如何应对这些类似的场景呢？
+
+我们可以指定特殊字符的跳过集合，忽略掉这些无意义的字符即可。
+
+v0.11.0 开始支持
+
+## 例子
+
+其中 charIgnore 对应的字符策略，用户可以自行灵活定义。
+
+```java
+final String text = "傻@冒，狗+东西";
+
+//默认因为有特殊字符分割，无法识别
+List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
+Assert.assertEquals("[]", wordList.toString());
+
+// 指定忽略的字符策略，可自行实现。
+List<String> wordList2 = SensitiveWordBs.newInstance()
+        .charIgnore(SensitiveWordCharIgnores.specialChars())
+        .init()
+        .findAll(text);
+
+Assert.assertEquals("[傻@冒, 狗+东西]", wordList2.toString());
+```
+
 # 敏感词标签

 ## 说明
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.github.houbb</groupId>
    <artifactId>sensitive-word</artifactId>
-    <version>0.10.0</version>
+    <version>0.11.0</version>

    <properties>
        <!--============================== All Plugins START ==============================-->
@@ -25,7 +25,7 @@
        <project.compiler.level>1.7</project.compiler.level>

        <!--============================== INTER ==============================-->
-        <heaven.version>0.2.7</heaven.version>
+        <heaven.version>0.6.0</heaven.version>
        <opencc4j.version>1.8.1</opencc4j.version>

        <!--============================== OTHER ==============================-->
--- a/release.bat
+++ b/release.bat
@@ -10,9 +10,9 @@ ECHO "============================= RELEASE START..."

 :: 版本号信息(需要手动指定)
 :::: 旧版本名称
-SET version=0.10.0
+SET version=0.11.0
 :::: 新版本名称
-SET newVersion=0.11.0
+SET newVersion=0.12.0
 :::: 组织名称
 SET groupName=com.github.houbb
 :::: 项目名称
--- a/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordCharIgnore.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/ISensitiveWordCharIgnore.java
@@ -0,0 +1,22 @@
+package com.github.houbb.sensitive.word.api;
+
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
+
+/**
+ * 是否忽略某一个字符
+ * @since 0.11.0
+ */
+public interface ISensitiveWordCharIgnore {
+
+    /**
+     * 是否忽略当前字符
+     * @param ix 下标志
+     * @param chars 字符数组
+     * @param innerContext 上下文
+     * @return 结果
+     */
+    boolean ignore(final int ix,
+                   final char[] chars,
+                   InnerSensitiveWordContext innerContext);
+
+}
--- a/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
+++ b/src/main/java/com/github/houbb/sensitive/word/api/IWordContext.java
@@ -237,4 +237,8 @@ public interface IWordContext {

    SensitiveWordContext wordTag(IWordTag wordTag);

+    ISensitiveWordCharIgnore charIgnore();
+
+    SensitiveWordContext charIgnore(ISensitiveWordCharIgnore charIgnore);
+
 }
--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordBs.java
@@ -15,6 +15,7 @@ import com.github.houbb.sensitive.word.support.combine.check.WordCheckCombines;
 import com.github.houbb.sensitive.word.support.combine.format.WordFormatCombines;
 import com.github.houbb.sensitive.word.support.data.WordDatas;
 import com.github.houbb.sensitive.word.support.deny.WordDenys;
+import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
 import com.github.houbb.sensitive.word.support.replace.WordReplaces;
 import com.github.houbb.sensitive.word.support.result.WordResultHandlers;
 import com.github.houbb.sensitive.word.support.tag.WordTags;
@@ -156,6 +157,12 @@ public class SensitiveWordBs {
     */
    private IWordTag wordTag = WordTags.none();

+    /**
+     * 忽略的字符策略
+     * @since 0.11.0
+     */
+    private ISensitiveWordCharIgnore charIgnore = SensitiveWordCharIgnores.defaults();
+
    /**
     * 新建验证实例
     * <p>
@@ -225,10 +232,18 @@ public class SensitiveWordBs {
        context.wordReplace(wordReplace);
        context.wordData(wordData);
        context.wordTag(wordTag);
+        context.charIgnore(charIgnore);

        return context;
    }

+    public SensitiveWordBs charIgnore(ISensitiveWordCharIgnore charIgnore) {
+        ArgUtil.notNull(charIgnore, "charIgnore");
+
+        this.charIgnore = charIgnore;
+        return this;
+    }
+
    public SensitiveWordBs wordTag(IWordTag wordTag) {
        ArgUtil.notNull(wordTag, "wordTag");

--- a/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
+++ b/src/main/java/com/github/houbb/sensitive/word/bs/SensitiveWordContext.java
@@ -107,6 +107,12 @@ public class SensitiveWordContext implements IWordContext {
     */
    private IWordTag wordTag;

+    /**
+     * 忽略的字符
+     * @since 0.11.0
+     */
+    private ISensitiveWordCharIgnore charIgnore;
+
    public IWordData wordData() {
        return wordData;
    }
@@ -290,4 +296,12 @@ public class SensitiveWordContext implements IWordContext {
        return this;
    }

+    public ISensitiveWordCharIgnore charIgnore() {
+        return charIgnore;
+    }
+
+    public SensitiveWordContext charIgnore(ISensitiveWordCharIgnore charIgnore) {
+        this.charIgnore = charIgnore;
+        return this;
+    }
 }
--- a/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/check/WordCheckWord.java
@@ -1,6 +1,7 @@
 package com.github.houbb.sensitive.word.support.check;

 import com.github.houbb.heaven.annotation.ThreadSafe;
+import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
 import com.github.houbb.sensitive.word.api.IWordCheck;
 import com.github.houbb.sensitive.word.api.IWordContext;
 import com.github.houbb.sensitive.word.api.IWordData;
@@ -46,16 +47,26 @@ public class WordCheckWord extends AbstractWordCheck {
        // 前一个条件
        StringBuilder stringBuilder = new StringBuilder();
        char[] rawChars = txt.toCharArray();
+
+        final ISensitiveWordCharIgnore wordCharIgnore = context.charIgnore();
+        int tempLen = 0;
        for(int i = beginIndex; i < rawChars.length; i++) {
+            // 判断是否跳过？
+            if(wordCharIgnore.ignore(i, rawChars, innerContext)) {
+                tempLen++;
+                continue;
+            }
+
            // 映射处理
            final char currentChar = rawChars[i];
            char mappingChar = formatCharMapping.get(currentChar);
            stringBuilder.append(mappingChar);
+            tempLen++;

            // 判断是否存在
            WordContainsTypeEnum wordContainsTypeEnum = wordData.contains(stringBuilder, innerContext);
            if(WordContainsTypeEnum.CONTAINS_END.equals(wordContainsTypeEnum)) {
-                actualLength = stringBuilder.length();
+                actualLength = tempLen;

                // 是否遍历全部匹配的模式
                if(WordValidModeEnum.FAIL_FAST.equals(wordValidModeEnum)) {
--- a/src/main/java/com/github/houbb/sensitive/word/support/ignore/AbstractSensitiveWordCharIgnore.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/ignore/AbstractSensitiveWordCharIgnore.java
@@ -0,0 +1,19 @@
+package com.github.houbb.sensitive.word.support.ignore;
+
+import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
+
+/**
+ * 抽象实现
+ * @since 0.11.0
+ */
+public abstract class AbstractSensitiveWordCharIgnore implements ISensitiveWordCharIgnore {
+
+    protected abstract boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext);
+
+    @Override
+    public boolean ignore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
+        return doIgnore(ix, chars, innerContext);
+    }
+
+}
--- a/src/main/java/com/github/houbb/sensitive/word/support/ignore/NoneSensitiveWordCharIgnore.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/ignore/NoneSensitiveWordCharIgnore.java
@@ -0,0 +1,16 @@
+package com.github.houbb.sensitive.word.support.ignore;
+
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
+
+/**
+ * 特殊字符忽略
+ * @since 0.11.0
+ */
+public class NoneSensitiveWordCharIgnore extends AbstractSensitiveWordCharIgnore {
+
+    @Override
+    protected boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
+        return false;
+    }
+
+}
--- a/src/main/java/com/github/houbb/sensitive/word/support/ignore/SensitiveWordCharIgnores.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/ignore/SensitiveWordCharIgnores.java
@@ -0,0 +1,22 @@
+package com.github.houbb.sensitive.word.support.ignore;
+
+import com.github.houbb.sensitive.word.api.ISensitiveWordCharIgnore;
+
+/**
+ * @since 0.11.0
+ */
+public class SensitiveWordCharIgnores {
+
+    public static ISensitiveWordCharIgnore specialChars() {
+        return new SpecialCharSensitiveWordCharIgnore();
+    }
+
+    public static ISensitiveWordCharIgnore none() {
+        return new NoneSensitiveWordCharIgnore();
+    }
+
+    public static ISensitiveWordCharIgnore defaults() {
+        return none();
+    }
+
+}
--- a/src/main/java/com/github/houbb/sensitive/word/support/ignore/SpecialCharSensitiveWordCharIgnore.java
+++ b/src/main/java/com/github/houbb/sensitive/word/support/ignore/SpecialCharSensitiveWordCharIgnore.java
@@ -0,0 +1,28 @@
+package com.github.houbb.sensitive.word.support.ignore;
+
+import com.github.houbb.heaven.util.lang.StringUtil;
+import com.github.houbb.sensitive.word.api.context.InnerSensitiveWordContext;
+
+import java.util.Set;
+
+/**
+ * 特殊字符忽略
+ * @since 0.11.0
+ */
+public class SpecialCharSensitiveWordCharIgnore extends AbstractSensitiveWordCharIgnore {
+
+    private static final String SPECIAL = "`-=~!@#$%^&*()_+[]{}\\|;:'\",./<>?";
+
+    private static final Set<Character> SET;
+
+    static {
+        SET = StringUtil.toCharSet(SPECIAL);
+    }
+
+    @Override
+    protected boolean doIgnore(int ix, char[] chars, InnerSensitiveWordContext innerContext) {
+        char c = chars[ix];
+        return SET.contains(c);
+    }
+
+}
--- a/src/main/resources/dict.txt
+++ b/src/main/resources/dict.txt
@@ -65141,3 +65141,8 @@ z以留吧以其以武
 龟投
 龟毛
 𫔰苞价咯
+傻逼
+傻冒
+狗东西
+草你大爷
+操你大爷
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEmailTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsEmailTest.java
@@ -38,4 +38,11 @@ public class SensitiveWordBsEmailTest {
        Assert.assertEquals("[123456789, xx.com]", wordList.toString());
    }

+    @Test
+    public void emailTest() {
+        final String text = "你我.他你";
+        List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
+        Assert.assertEquals("[]", wordList.toString());
+    }
+
 }
--- a/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsIgnoreCharTest.java
+++ b/src/test/java/com/github/houbb/sensitive/word/bs/SensitiveWordBsIgnoreCharTest.java
@@ -0,0 +1,39 @@
+package com.github.houbb.sensitive.word.bs;
+
+import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.List;
+
+/**
+ * <p> project: sensitive-word-SensitiveWordBsTest </p>
+ * <p> create on 2020/1/7 23:43 </p>
+ *
+ * @author Administrator
+ * @since 0.11.0
+ */
+public class SensitiveWordBsIgnoreCharTest {
+
+    /**
+     * 忽略中文繁简体
+     * @since 0.0.6
+     */
+    @Test
+    public void ignoreChineseStyleTest() {
+        final String text = "傻@冒，狗+东西";
+
+        //默认因为有特殊字符分割，无法识别
+        List<String> wordList = SensitiveWordBs.newInstance().init().findAll(text);
+        Assert.assertEquals("[]", wordList.toString());
+
+        // 指定忽略的字符策略，可自行实现。
+        List<String> wordList2 = SensitiveWordBs.newInstance()
+                .charIgnore(SensitiveWordCharIgnores.specialChars())
+                .init()
+                .findAll(text);
+
+        Assert.assertEquals("[傻@冒, 狗+东西]", wordList2.toString());
+    }
+
+}