From 40b4fea1e7ade0149cf72229aaec798e3bae383c Mon Sep 17 00:00:00 2001 From: "binbin.hou" Date: Thu, 8 Jun 2023 23:42:24 +0800 Subject: [PATCH] [Feature] add for new --- .../sensitive/word/core/SensitiveWord.java | 4 + .../impl/AbstractConditionSensitiveCheck.java | 2 - .../word/benchmark/BenchmarkBasicTest.java | 79 ++++++++++++++++++- 3 files changed, 81 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java index 692bd76..f4cd8f1 100644 --- a/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java +++ b/src/main/java/com/github/houbb/sensitive/word/core/SensitiveWord.java @@ -50,6 +50,10 @@ public class SensitiveWord extends AbstractSensitiveWord { //1. 是否存在敏感词,如果比存在,直接返回空列表 final ISensitiveCheck sensitiveCheck = context.sensitiveCheck(); List resultList = Guavas.newArrayList(); + + //TODO: 这里拆分为2个部分,从而保障性能。但是要注意处理下标的问题。 + //1. 原始的敏感词部分 + //2. email/url/num 的单独一次遍历处理。 for (int i = 0; i < text.length(); i++) { SensitiveCheckResult checkResult = sensitiveCheck.sensitiveCheck(text, i, ValidModeEnum.FAIL_OVER, context); diff --git a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractConditionSensitiveCheck.java b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractConditionSensitiveCheck.java index d4256aa..ad00b3d 100644 --- a/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractConditionSensitiveCheck.java +++ b/src/main/java/com/github/houbb/sensitive/word/support/check/impl/AbstractConditionSensitiveCheck.java @@ -51,8 +51,6 @@ public abstract class AbstractConditionSensitiveCheck extends AbstractSensitiveC // 采用 ThreadLocal 应该可以提升性能,减少对象的创建。 StringBuilder stringBuilder = new StringBuilder(); - // 前一个条件 - boolean preCondition = false; int currentIx = 0; for(int i = beginIndex; i < txt.length(); i++) { currentIx = i; diff --git a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java index b91d7b3..9b3f651 100644 --- a/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java +++ b/src/test/java/com/github/houbb/sensitive/word/benchmark/BenchmarkBasicTest.java @@ -6,7 +6,7 @@ import com.github.houbb.sensitive.word.core.SensitiveWordHelper; import org.junit.Ignore; import org.junit.Test; -@Ignore +//@Ignore public class BenchmarkBasicTest { /** @@ -51,7 +51,7 @@ public class BenchmarkBasicTest { // 1W 次 long start = System.currentTimeMillis(); SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() -// .enableWordCheck(false) + .enableWordCheck(true) .enableNumCheck(false) .enableUrlCheck(false) .enableEmailCheck(false) @@ -64,4 +64,79 @@ public class BenchmarkBasicTest { System.out.println("------------------ COST: " + (end-start)); } + /** + * + * COST: 1540-pc + */ + @Test + public void costTimeOnlyNumTest() { + String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100) + + "我们他妈的从来不说脏说"; + + // 1W 次 + long start = System.currentTimeMillis(); + SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() + .enableWordCheck(false) + .enableNumCheck(true) + .enableUrlCheck(false) + .enableEmailCheck(false) + .init(); + + for(int i = 0; i < 10000; i++) { + sensitiveWordBs.findAll(randomText); + } + long end = System.currentTimeMillis(); + System.out.println("------------------ COST: " + (end-start)); + } + + /** + * + * COST: 20284-pc + */ + @Test + public void costTimeOnlyUrlTest() { + String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100) + + "我们他妈的从来不说脏说"; + + // 1W 次 + long start = System.currentTimeMillis(); + SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() + .enableWordCheck(false) + .enableNumCheck(false) + .enableUrlCheck(true) + .enableEmailCheck(false) + .init(); + + for(int i = 0; i < 10000; i++) { + sensitiveWordBs.findAll(randomText); + } + long end = System.currentTimeMillis(); + System.out.println("------------------ COST: " + (end-start)); + } + + /** + * + * COST: 19036-pc + */ + @Test + public void costTimeOnlyEmailTest() { + String randomText = "你他妈的不要说脏话"+ RandomUtil.randomString("1234567890bcdefghiJKLMNOPQRSTUVWXYZ", 100) + + "我们他妈的从来不说脏说"; + + // 1W 次 + long start = System.currentTimeMillis(); + SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance() + .enableWordCheck(false) + .enableNumCheck(false) + .enableUrlCheck(false) + .enableEmailCheck(true) + .init(); + + for(int i = 0; i < 10000; i++) { + sensitiveWordBs.findAll(randomText); + } + long end = System.currentTimeMillis(); + System.out.println("------------------ COST: " + (end-start)); + } + }