From 3c77ed3c5fd5386c7d2c5ebc31ac3b9dc698d73a Mon Sep 17 00:00:00 2001
From: zhaoqichao <1416537683@qq.com>
Date: Wed, 5 Jul 2023 09:29:21 +0800
Subject: [PATCH 1/6] =?UTF-8?q?Url=E8=A7=A3=E6=9E=90=E7=BB=84=E4=BB=B6?=
=?UTF-8?q?=E9=87=8D=E6=9E=84=E5=8F=8A=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
...Discover.java => AbstractUrlDiscover.java} | 38 +++++++-----
.../utils/discover/CommonUrlDiscover.java | 56 +++++++++++++++++
.../discover/CommonUrlTitleDiscover.java | 15 -----
.../discover/PrioritizedUrlDiscover.java | 60 +++++++++++++++++++
.../discover/PrioritizedUrlTitleDiscover.java | 33 ----------
.../common/utils/discover/UrlDiscover.java | 46 ++++++++++++++
.../utils/discover/UrlTitleDiscover.java | 35 -----------
.../common/utils/discover/WxUrlDiscover.java | 30 ++++++++++
.../utils/discover/WxUrlTitleDiscover.java | 15 -----
.../service/strategy/msg/TextMsgHandler.java | 4 +-
10 files changed, 218 insertions(+), 114 deletions(-)
rename mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/{AbstractUrlTitleDiscover.java => AbstractUrlDiscover.java} (64%)
create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlDiscover.java
delete mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlTitleDiscover.java
create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java
delete mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlTitleDiscover.java
create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/UrlDiscover.java
delete mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/UrlTitleDiscover.java
create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/WxUrlDiscover.java
delete mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/WxUrlTitleDiscover.java
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlTitleDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlDiscover.java
similarity index 64%
rename from mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlTitleDiscover.java
rename to mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlDiscover.java
index 79cbefe..80d7ab8 100644
--- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlTitleDiscover.java
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlDiscover.java
@@ -3,14 +3,15 @@ package com.abin.mallchat.common.common.utils.discover;
import cn.hutool.core.util.ReUtil;
import cn.hutool.core.util.StrUtil;
import com.abin.mallchat.common.common.utils.FutureUtils;
+import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
+import org.jetbrains.annotations.Nullable;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.springframework.data.util.Pair;
-import javax.annotation.Nullable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -20,46 +21,55 @@ import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
- * Description: urlTitle查询抽象类
- * Author: abin
- * Date: 2023-05-27
+ * @author zhaoqichao
+ * @date 2023/7/3 16:38
*/
@Slf4j
-public abstract class AbstractUrlTitleDiscover implements UrlTitleDiscover {
+public abstract class AbstractUrlDiscover implements UrlDiscover {
//链接识别的正则
private static final Pattern PATTERN = Pattern.compile("((http|https)://)?(www.)?([\\w_-]+(?:(?:\\.[\\w_-]+)+))([\\w.,@?^=%&:/~+#-]*[\\w@?^=%&/~+#-])?");
- @Nullable
+
+ @javax.annotation.Nullable
@Override
- public Map getContentTitleMap(String content) {
+ public Map getUrlContentMap(String content) {
+
if (StrUtil.isBlank(content)) {
return new HashMap<>();
}
List matchList = ReUtil.findAll(PATTERN, content, 0);
+
//并行请求
- List>> futures = matchList.stream().map(match -> CompletableFuture.supplyAsync(() -> {
- String title = getUrlTitle(match);
- return StringUtils.isNotEmpty(title) ? Pair.of(match, title) : null;
+ List>> futures = matchList.stream().map(match -> CompletableFuture.supplyAsync(() -> {
+ UrlInfo urlInfo = getContent(match);
+ return Objects.isNull(urlInfo) ? null : Pair.of(match, urlInfo);
})).collect(Collectors.toList());
- CompletableFuture>> future = FutureUtils.sequenceNonNull(futures);
+ CompletableFuture>> future = FutureUtils.sequenceNonNull(futures);
//结果组装
return future.join().stream().collect(Collectors.toMap(Pair::getFirst, Pair::getSecond, (a, b) -> a));
}
@Nullable
@Override
- public String getUrlTitle(String url) {
+ public UrlInfo getContent(String url) {
Document document = getUrlDocument(assemble(url));
if (Objects.isNull(document)) {
return null;
}
- return getDocTitle(document);
+
+ return UrlInfo.builder()
+ .title(getTitle(document))
+ .description(getDescription(document))
+ .image(getImage(assemble(url),document)).build();
}
+
private String assemble(String url) {
+
if (!StrUtil.startWith(url, "http")) {
return "http://" + url;
}
+
return url;
}
@@ -69,7 +79,7 @@ public abstract class AbstractUrlTitleDiscover implements UrlTitleDiscover {
connect.timeout(2000);
return connect.get();
} catch (Exception e) {
- log.error("find title error:url:{}", matchUrl, e);
+ log.error("find error:url:{}", matchUrl, e);
}
return null;
}
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlDiscover.java
new file mode 100644
index 0000000..b73c81d
--- /dev/null
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlDiscover.java
@@ -0,0 +1,56 @@
+package com.abin.mallchat.common.common.utils.discover;
+
+import cn.hutool.core.util.StrUtil;
+import io.jsonwebtoken.lang.Objects;
+import org.apache.commons.lang3.StringUtils;
+import org.jetbrains.annotations.Nullable;
+import org.jsoup.nodes.Document;
+import org.jsoup.select.Elements;
+
+/**
+ * @author zhaoqichao
+ * @date 2023/7/3 16:54
+ */
+public class CommonUrlDiscover extends AbstractUrlDiscover {
+ @Nullable
+ @Override
+ public String getTitle(Document document) {
+ return document.title();
+ }
+
+ @Nullable
+ @Override
+ public String getDescription(Document document) {
+ String description = document.head().select("meta[name=description]").attr("content");
+ String keywords = document.head().select("meta[name=keywords]").attr("content");
+ String content = StrUtil.isNotBlank(description) ? description : keywords;
+ //只保留一句话的描述
+ return StrUtil.isNotBlank(content) ? content.substring(0, content.indexOf("。")) : content;
+ }
+
+ @Nullable
+ @Override
+ public String getImage(String url, Document document) {
+ //如果包含og则是微信链接
+ if(StrUtil.isNotBlank(document.getElementsByAttributeValue("property", "og:title").attr("content"))){
+ return null;
+ }
+ String image = document.select("link[type=image/x-icon]").attr("href");
+ //如果没有去匹配含有icon属性的logo
+ String href = StrUtil.isEmpty(image) ? document.select("link[rel$=icon]").attr("href") : image;
+ //如果icon中已经包含了url部分域名
+ if (StrUtil.isNotBlank(StrUtil.removeAny(StrUtil.removeAny(href, "/"), "favicon.ico")) &&
+ StrUtil.containsAny(StrUtil.removePrefix(url, "http://"), StrUtil.removeAny(StrUtil.removeAny(href, "/"), "favicon.ico"))) {
+ return "http://" + StrUtil.removePrefix(href, "/");
+ }
+ //如果url已经包含了logo
+ if (StrUtil.containsAny(url, "favicon")) {
+ return url;
+ }
+ //如果logo中有url
+ if (StrUtil.containsAny(href, "http") || StrUtil.containsAny(href, "https")) {
+ return href;
+ }
+ return StrUtil.format("{}/{}", url, StrUtil.removePrefix(href, "/"));
+ }
+}
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlTitleDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlTitleDiscover.java
deleted file mode 100644
index 6471610..0000000
--- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlTitleDiscover.java
+++ /dev/null
@@ -1,15 +0,0 @@
-package com.abin.mallchat.common.common.utils.discover;
-
-import org.jsoup.nodes.Document;
-
-/**
- * Description: 通用的标题解析类
- * Author: abin
- * Date: 2023-05-27
- */
-public class CommonUrlTitleDiscover extends AbstractUrlTitleDiscover {
- @Override
- public String getDocTitle(Document document) {
- return document.title();
- }
-}
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java
new file mode 100644
index 0000000..ab57334
--- /dev/null
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java
@@ -0,0 +1,60 @@
+package com.abin.mallchat.common.common.utils.discover;
+
+import cn.hutool.core.util.StrUtil;
+import org.jetbrains.annotations.Nullable;
+import org.jsoup.nodes.Document;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Description: 具有优先级的title查询器
+ * Author: abin
+ * Date: 2023-05-27
+ */
+public class PrioritizedUrlDiscover extends AbstractUrlDiscover {
+
+ private final List urlTitleDiscovers = new ArrayList<>(2);
+
+ public PrioritizedUrlDiscover() {
+ urlTitleDiscovers.add(new CommonUrlDiscover());
+ urlTitleDiscovers.add(new WxUrlDiscover());
+ }
+
+
+ @Nullable
+ @Override
+ public String getTitle(Document document) {
+ for (UrlDiscover urlDiscover : urlTitleDiscovers) {
+ String urlTitle = urlDiscover.getTitle(document);
+ if (StrUtil.isNotBlank(urlTitle)) {
+ return urlTitle;
+ }
+ }
+ return null;
+ }
+
+ @Nullable
+ @Override
+ public String getDescription(Document document) {
+ for (UrlDiscover urlDiscover : urlTitleDiscovers) {
+ String urlDescription = urlDiscover.getDescription(document);
+ if (StrUtil.isNotBlank(urlDescription)) {
+ return urlDescription;
+ }
+ }
+ return null;
+ }
+
+ @Nullable
+ @Override
+ public String getImage(String url, Document document) {
+ for (UrlDiscover urlDiscover : urlTitleDiscovers) {
+ String urlImage = urlDiscover.getImage(url,document);
+ if (StrUtil.isNotBlank(urlImage)) {
+ return urlImage;
+ }
+ }
+ return null;
+ }
+}
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlTitleDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlTitleDiscover.java
deleted file mode 100644
index 8c7fb4d..0000000
--- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlTitleDiscover.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package com.abin.mallchat.common.common.utils.discover;
-
-import cn.hutool.core.util.StrUtil;
-import org.jsoup.nodes.Document;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Description: 具有优先级的title查询器
- * Author: abin
- * Date: 2023-05-27
- */
-public class PrioritizedUrlTitleDiscover extends AbstractUrlTitleDiscover {
-
- private final List urlTitleDiscovers = new ArrayList<>(2);
-
- public PrioritizedUrlTitleDiscover() {
- urlTitleDiscovers.add(new CommonUrlTitleDiscover());
- urlTitleDiscovers.add(new WxUrlTitleDiscover());
- }
-
- @Override
- public String getDocTitle(Document document) {
- for (UrlTitleDiscover urlTitleDiscover : urlTitleDiscovers) {
- String urlTitle = urlTitleDiscover.getDocTitle(document);
- if (StrUtil.isNotBlank(urlTitle)) {
- return urlTitle;
- }
- }
- return null;
- }
-}
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/UrlDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/UrlDiscover.java
new file mode 100644
index 0000000..3515c28
--- /dev/null
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/UrlDiscover.java
@@ -0,0 +1,46 @@
+package com.abin.mallchat.common.common.utils.discover;
+
+import cn.hutool.core.date.StopWatch;
+import cn.hutool.core.util.StrUtil;
+import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
+import org.jsoup.nodes.Document;
+
+import javax.annotation.Nullable;
+import java.util.Map;
+
+/**
+ * @author zhaoqichao
+ * @date 2023/7/3 16:34
+ */
+public interface UrlDiscover {
+
+
+ @Nullable
+ Map getUrlContentMap(String content);
+
+ @Nullable
+ UrlInfo getContent(String url);
+
+ @Nullable
+ String getTitle(Document document);
+
+ @Nullable
+ String getDescription(Document document);
+
+ @Nullable
+ String getImage(String url, Document document);
+
+ public static void main(String[] args) {
+ StopWatch stopWatch = new StopWatch();
+ stopWatch.start();
+ String longStr = "其中包含一个URL www.baidu.com,一个带有端口号的URL http://www.jd.com:80, 一个带有路径的URL http://mallchat.cn, 还有美团技术文章https://mp.weixin.qq.com/s/hwTf4bDck9_tlFpgVDeIKg ";
+// String longStr = "一个带有端口号的URL http://www.jd.com:80,";
+// String longStr = "一个带有路径的URL http://mallchat.cn";
+ PrioritizedUrlDiscover discover = new PrioritizedUrlDiscover();
+ final Map map = discover.getUrlContentMap(longStr);
+ System.out.println(map);
+ stopWatch.stop();
+ long cost = stopWatch.getTotalTimeMillis();
+ System.out.println(cost);
+ }
+}
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/UrlTitleDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/UrlTitleDiscover.java
deleted file mode 100644
index e2fac68..0000000
--- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/UrlTitleDiscover.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package com.abin.mallchat.common.common.utils.discover;
-
-import cn.hutool.core.date.StopWatch;
-import org.jsoup.nodes.Document;
-
-import javax.annotation.Nullable;
-import java.util.Map;
-
-public interface UrlTitleDiscover {
-
-
- @Nullable
- Map getContentTitleMap(String content);
-
-
- @Nullable
- String getUrlTitle(String url);
-
- @Nullable
- String getDocTitle(Document document);
-
- public static void main(String[] args) {//用异步多任务查询并合并 974 //串行访问的速度1349 1291 1283 1559
- StopWatch stopWatch = new StopWatch();
- stopWatch.start();
- String longStr = "这是一个很长的字符串再来 www.github.com,其中包含一个URL www.baidu.com,, 一个带有端口号的URL http://www.jd.com:80, 一个带有路径的URL http://mallchat.cn, 还有美团技术文章https://mp.weixin.qq.com/s/hwTf4bDck9_tlFpgVDeIKg ";
- PrioritizedUrlTitleDiscover discover = new PrioritizedUrlTitleDiscover();
- Map contentTitleMap = discover.getContentTitleMap(longStr);
- System.out.println(contentTitleMap);
-//
-// Jsoup.connect("http:// www.github.com");
- stopWatch.stop();
- long cost = stopWatch.getTotalTimeMillis();
- System.out.println(cost);
- }//{http://mallchat.cn=MallChat, www.baidu.com=百度一下,你就知道, https://mp.weixin.qq.com/s/hwTf4bDck9_tlFpgVDeIKg=超大规模数据库集群保稳系列之二:数据库攻防演练建设实践, http://www.jd.com:80=京东(JD.COM)-正品低价、品质保障、配送及时、轻松购物!}
-}
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/WxUrlDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/WxUrlDiscover.java
new file mode 100644
index 0000000..a5bb330
--- /dev/null
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/WxUrlDiscover.java
@@ -0,0 +1,30 @@
+package com.abin.mallchat.common.common.utils.discover;
+
+import org.jetbrains.annotations.Nullable;
+import org.jsoup.nodes.Document;
+
+/**
+ * Description: 针对微信公众号文章的标题获取类
+ * Author: abin
+ * Date: 2023-05-27
+ */
+public class WxUrlDiscover extends AbstractUrlDiscover {
+
+ @Nullable
+ @Override
+ public String getTitle(Document document) {
+ return document.getElementsByAttributeValue("property", "og:title").attr("content");
+ }
+
+ @Nullable
+ @Override
+ public String getDescription(Document document) {
+ return document.getElementsByAttributeValue("property", "og:description").attr("content");
+ }
+
+ @Nullable
+ @Override
+ public String getImage(String url, Document document) {
+ return document.getElementsByAttributeValue("property", "og:image").attr("content");
+ }
+}
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/WxUrlTitleDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/WxUrlTitleDiscover.java
deleted file mode 100644
index 29b1172..0000000
--- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/WxUrlTitleDiscover.java
+++ /dev/null
@@ -1,15 +0,0 @@
-package com.abin.mallchat.common.common.utils.discover;
-
-import org.jsoup.nodes.Document;
-
-/**
- * Description: 针对微信公众号文章的标题获取类
- * Author: abin
- * Date: 2023-05-27
- */
-public class WxUrlTitleDiscover extends AbstractUrlTitleDiscover {
- @Override
- public String getDocTitle(Document document) {
- return document.getElementsByAttributeValue("property", "og:title").attr("content");
- }
-}
diff --git a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/strategy/msg/TextMsgHandler.java b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/strategy/msg/TextMsgHandler.java
index d8e347a..6b63a31 100644
--- a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/strategy/msg/TextMsgHandler.java
+++ b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/strategy/msg/TextMsgHandler.java
@@ -11,7 +11,7 @@ import com.abin.mallchat.common.chat.service.cache.MsgCache;
import com.abin.mallchat.common.common.domain.enums.YesOrNoEnum;
import com.abin.mallchat.common.common.utils.AssertUtil;
import com.abin.mallchat.common.common.utils.SensitiveWordUtils;
-import com.abin.mallchat.common.common.utils.discover.PrioritizedUrlTitleDiscover;
+import com.abin.mallchat.common.common.utils.discover.PrioritizedUrlDiscover;
import com.abin.mallchat.common.user.domain.entity.User;
import com.abin.mallchat.common.user.domain.enums.RoleEnum;
import com.abin.mallchat.common.user.service.IRoleService;
@@ -47,7 +47,7 @@ public class TextMsgHandler extends AbstractMsgHandler {
@Autowired
private IRoleService iRoleService;
- private static final PrioritizedUrlTitleDiscover URL_TITLE_DISCOVER = new PrioritizedUrlTitleDiscover();
+ private static final PrioritizedUrlDiscover URL_TITLE_DISCOVER = new PrioritizedUrlDiscover();
@Override
MessageTypeEnum getMsgTypeEnum() {
From 27a7709f08e2da30cfa00edc7faa3e78095c55cb Mon Sep 17 00:00:00 2001
From: zhaoqichao <1416537683@qq.com>
Date: Wed, 5 Jul 2023 09:42:04 +0800
Subject: [PATCH 2/6] =?UTF-8?q?Url=E8=A7=A3=E6=9E=90=E7=BB=84=E4=BB=B6?=
=?UTF-8?q?=E9=87=8D=E6=9E=84=E5=8F=8A=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
MallChat | 1 +
.../common/utils/discover/domain/UrlInfo.java | 32 +++++++++++++++++++
2 files changed, 33 insertions(+)
create mode 160000 MallChat
create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/domain/UrlInfo.java
diff --git a/MallChat b/MallChat
new file mode 160000
index 0000000..c47b487
--- /dev/null
+++ b/MallChat
@@ -0,0 +1 @@
+Subproject commit c47b48760dd1eaaed6cf1c62930c65032ed66752
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/domain/UrlInfo.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/domain/UrlInfo.java
new file mode 100644
index 0000000..8f02b7c
--- /dev/null
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/domain/UrlInfo.java
@@ -0,0 +1,32 @@
+package com.abin.mallchat.common.common.utils.discover.domain;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+/**
+ * @author zhaoqichao
+ * @date 2023/7/3 16:12
+ */
+@Data
+@Builder
+@AllArgsConstructor
+@NoArgsConstructor
+public class UrlInfo {
+ /**
+ * 标题
+ **/
+ String title;
+
+ /**
+ * 描述
+ **/
+ String description;
+
+ /**
+ * 网站LOGO
+ **/
+ String image;
+
+}
From b516e51bc931bc96ffa08acdfa2dfe1f13db13bf Mon Sep 17 00:00:00 2001
From: zhaoqichao <1416537683@qq.com>
Date: Wed, 5 Jul 2023 18:12:17 +0800
Subject: [PATCH 3/6] =?UTF-8?q?Url=E8=A7=A3=E6=9E=90=E7=BB=84=E4=BB=B6?=
=?UTF-8?q?=E4=BC=98=E5=8C=96=EF=BC=8Cserver=E7=AB=AF=E6=98=A0=E5=B0=84?=
=?UTF-8?q?=E8=B0=83=E6=95=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../common/chat/domain/entity/msg/MessageExtra.java | 3 ++-
.../common/utils/discover/AbstractUrlDiscover.java | 4 ++--
.../utils/discover/PrioritizedUrlDiscover.java | 12 ++++++------
.../chat/domain/vo/response/ChatMessageResp.java | 3 ++-
.../chat/domain/vo/response/msg/TextMsgResp.java | 3 ++-
.../custom/chat/service/adapter/MessageAdapter.java | 2 +-
.../chat/service/strategy/msg/TextMsgHandler.java | 7 ++++---
7 files changed, 19 insertions(+), 15 deletions(-)
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/chat/domain/entity/msg/MessageExtra.java b/mallchat-common/src/main/java/com/abin/mallchat/common/chat/domain/entity/msg/MessageExtra.java
index 9a68c74..fa3af00 100644
--- a/mallchat-common/src/main/java/com/abin/mallchat/common/chat/domain/entity/msg/MessageExtra.java
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/chat/domain/entity/msg/MessageExtra.java
@@ -1,5 +1,6 @@
package com.abin.mallchat.common.chat.domain.entity.msg;
+import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import lombok.AllArgsConstructor;
import lombok.Builder;
@@ -23,7 +24,7 @@ import java.util.Map;
public class MessageExtra implements Serializable {
private static final long serialVersionUID = 1L;
//url跳转链接
- private Map urlTitleMap;
+ private Map urlContentMap;
//消息撤回详情
private MsgRecall recall;
//艾特的uid
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlDiscover.java
index 80d7ab8..4da24e9 100644
--- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlDiscover.java
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlDiscover.java
@@ -5,7 +5,6 @@ import cn.hutool.core.util.StrUtil;
import com.abin.mallchat.common.common.utils.FutureUtils;
import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
import lombok.extern.slf4j.Slf4j;
-import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.Nullable;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
@@ -30,7 +29,7 @@ public abstract class AbstractUrlDiscover implements UrlDiscover {
private static final Pattern PATTERN = Pattern.compile("((http|https)://)?(www.)?([\\w_-]+(?:(?:\\.[\\w_-]+)+))([\\w.,@?^=%&:/~+#-]*[\\w@?^=%&/~+#-])?");
- @javax.annotation.Nullable
+ @Nullable
@Override
public Map getUrlContentMap(String content) {
@@ -83,4 +82,5 @@ public abstract class AbstractUrlDiscover implements UrlDiscover {
}
return null;
}
+
}
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java
index ab57334..106f598 100644
--- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java
@@ -14,18 +14,18 @@ import java.util.List;
*/
public class PrioritizedUrlDiscover extends AbstractUrlDiscover {
- private final List urlTitleDiscovers = new ArrayList<>(2);
+ private final List urlDiscovers = new ArrayList<>(2);
public PrioritizedUrlDiscover() {
- urlTitleDiscovers.add(new CommonUrlDiscover());
- urlTitleDiscovers.add(new WxUrlDiscover());
+ urlDiscovers.add(new CommonUrlDiscover());
+ urlDiscovers.add(new WxUrlDiscover());
}
@Nullable
@Override
public String getTitle(Document document) {
- for (UrlDiscover urlDiscover : urlTitleDiscovers) {
+ for (UrlDiscover urlDiscover : urlDiscovers) {
String urlTitle = urlDiscover.getTitle(document);
if (StrUtil.isNotBlank(urlTitle)) {
return urlTitle;
@@ -37,7 +37,7 @@ public class PrioritizedUrlDiscover extends AbstractUrlDiscover {
@Nullable
@Override
public String getDescription(Document document) {
- for (UrlDiscover urlDiscover : urlTitleDiscovers) {
+ for (UrlDiscover urlDiscover : urlDiscovers) {
String urlDescription = urlDiscover.getDescription(document);
if (StrUtil.isNotBlank(urlDescription)) {
return urlDescription;
@@ -49,7 +49,7 @@ public class PrioritizedUrlDiscover extends AbstractUrlDiscover {
@Nullable
@Override
public String getImage(String url, Document document) {
- for (UrlDiscover urlDiscover : urlTitleDiscovers) {
+ for (UrlDiscover urlDiscover : urlDiscovers) {
String urlImage = urlDiscover.getImage(url,document);
if (StrUtil.isNotBlank(urlImage)) {
return urlImage;
diff --git a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/domain/vo/response/ChatMessageResp.java b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/domain/vo/response/ChatMessageResp.java
index 968aae9..2cfed96 100644
--- a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/domain/vo/response/ChatMessageResp.java
+++ b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/domain/vo/response/ChatMessageResp.java
@@ -1,5 +1,6 @@
package com.abin.mallchat.custom.chat.domain.vo.response;
+import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
import io.swagger.annotations.ApiModelProperty;
import lombok.AllArgsConstructor;
import lombok.Builder;
@@ -50,7 +51,7 @@ public class ChatMessageResp {
private String content;
@ApiModelProperty("消息链接映射-废弃")
@Deprecated
- private Map urlTitleMap;
+ private Map urlContentMap;
@ApiModelProperty("消息类型 1正常文本 2.撤回消息")
private Integer type;
@ApiModelProperty("消息内容不同的消息类型,内容体不同,见https://www.yuque.com/snab/mallcaht/rkb2uz5k1qqdmcmd")
diff --git a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/domain/vo/response/msg/TextMsgResp.java b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/domain/vo/response/msg/TextMsgResp.java
index 34db6c5..17a7b48 100644
--- a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/domain/vo/response/msg/TextMsgResp.java
+++ b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/domain/vo/response/msg/TextMsgResp.java
@@ -1,5 +1,6 @@
package com.abin.mallchat.custom.chat.domain.vo.response.msg;
+import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
import io.swagger.annotations.ApiModelProperty;
import lombok.AllArgsConstructor;
import lombok.Builder;
@@ -22,7 +23,7 @@ public class TextMsgResp {
@ApiModelProperty("消息内容")
private String content;
@ApiModelProperty("消息链接映射")
- private Map urlTitleMap;
+ private Map urlContentMap;
@ApiModelProperty("艾特的uid")
private List atUidList;
@ApiModelProperty("父消息,如果没有父消息,返回的是null")
diff --git a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/adapter/MessageAdapter.java b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/adapter/MessageAdapter.java
index 53e10fa..a751ad6 100644
--- a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/adapter/MessageAdapter.java
+++ b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/adapter/MessageAdapter.java
@@ -56,7 +56,7 @@ public class MessageAdapter {
messageVO.setSendTime(message.getCreateTime());
AbstractMsgHandler msgHandler = MsgHandlerFactory.getStrategyNoNull(message.getType());
messageVO.setBody(msgHandler.showMsg(message));
- messageVO.setUrlTitleMap(Optional.ofNullable(message.getExtra()).map(MessageExtra::getUrlTitleMap).orElse(null));
+ messageVO.setUrlContentMap(Optional.ofNullable(message.getExtra()).map(MessageExtra::getUrlContentMap).orElse(null));
Message replyMessage = replyMap.get(message.getReplyMsgId());
//回复消息
diff --git a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/strategy/msg/TextMsgHandler.java b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/strategy/msg/TextMsgHandler.java
index 6b63a31..1cc3f25 100644
--- a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/strategy/msg/TextMsgHandler.java
+++ b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/strategy/msg/TextMsgHandler.java
@@ -12,6 +12,7 @@ import com.abin.mallchat.common.common.domain.enums.YesOrNoEnum;
import com.abin.mallchat.common.common.utils.AssertUtil;
import com.abin.mallchat.common.common.utils.SensitiveWordUtils;
import com.abin.mallchat.common.common.utils.discover.PrioritizedUrlDiscover;
+import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
import com.abin.mallchat.common.user.domain.entity.User;
import com.abin.mallchat.common.user.domain.enums.RoleEnum;
import com.abin.mallchat.common.user.service.IRoleService;
@@ -91,8 +92,8 @@ public class TextMsgHandler extends AbstractMsgHandler {
}
//判断消息url跳转
- Map urlTitleMap = URL_TITLE_DISCOVER.getContentTitleMap(body.getContent());
- extra.setUrlTitleMap(urlTitleMap);
+ Map urlContentMap = URL_TITLE_DISCOVER.getUrlContentMap(body.getContent());
+ extra.setUrlContentMap(urlContentMap);
//艾特功能
if (CollectionUtil.isNotEmpty(body.getAtUidList())) {
extra.setAtUidList(body.getAtUidList());
@@ -106,7 +107,7 @@ public class TextMsgHandler extends AbstractMsgHandler {
public Object showMsg(Message msg) {
TextMsgResp resp = new TextMsgResp();
resp.setContent(msg.getContent());
- resp.setUrlTitleMap(Optional.ofNullable(msg.getExtra()).map(MessageExtra::getUrlTitleMap).orElse(null));
+ resp.setUrlContentMap(Optional.ofNullable(msg.getExtra()).map(MessageExtra::getUrlContentMap).orElse(null));
resp.setAtUidList(Optional.ofNullable(msg.getExtra()).map(MessageExtra::getAtUidList).orElse(null));
//回复消息
Optional reply = Optional.ofNullable(msg.getReplyMsgId())
From 21ea09cd4cb3e24941a5d1ee1e6e414dd3c8f3e3 Mon Sep 17 00:00:00 2001
From: zhaoqichao <1416537683@qq.com>
Date: Thu, 6 Jul 2023 10:23:44 +0800
Subject: [PATCH 4/6] =?UTF-8?q?Url=E8=A7=A3=E6=9E=90=E7=BB=84=E4=BB=B6?=
=?UTF-8?q?=E6=A8=A1=E5=BC=8F=E9=87=8D=E6=9E=84=EF=BC=88=E4=B8=8D=E5=BD=B1?=
=?UTF-8?q?=E5=93=8D=E6=97=A7=E7=89=88=E6=9C=AC=E4=BD=BF=E7=94=A8=EF=BC=89?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../common/utils/chain/Application.java | 22 +++++
.../common/utils/chain/FactoryUrlHandler.java | 96 +++++++++++++++++++
.../utils/chain/PrioritizedUrlHandler.java | 39 ++++++++
.../common/common/utils/chain/UrlHandler.java | 25 +++++
.../common/utils/chain/WxUrlHandler.java | 32 +++++++
.../common/utils/chain/dto/UrlInfo.java | 32 +++++++
6 files changed, 246 insertions(+)
create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/Application.java
create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/FactoryUrlHandler.java
create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/PrioritizedUrlHandler.java
create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/UrlHandler.java
create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/WxUrlHandler.java
create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/dto/UrlInfo.java
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/Application.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/Application.java
new file mode 100644
index 0000000..b2c5d70
--- /dev/null
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/Application.java
@@ -0,0 +1,22 @@
+package com.abin.mallchat.common.common.utils.chain;
+
+import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
+import org.jetbrains.annotations.Nullable;
+import org.jsoup.nodes.Document;
+
+import java.util.Map;
+
+/**
+ * Description: 测试
+ * Author: achao
+ * Date: 2023/7/6 9:29
+ */
+public class Application {
+ public static void main(String[] args) {
+ PrioritizedUrlHandler handler = new PrioritizedUrlHandler();
+ String longStr = "其中包含一个URL www.baidu.com,一个带有端口号的URL http://www.jd.com:80, 一个带有路径的URL http://mallchat.cn, 还有美团技术文章https://mp.weixin.qq.com/s/hwTf4bDck9_tlFpgVDeIKg ";
+
+ Map urlContentMap = handler.getUrlContentMap(longStr);
+ System.out.println(urlContentMap);
+ }
+}
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/FactoryUrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/FactoryUrlHandler.java
new file mode 100644
index 0000000..bd8d0e8
--- /dev/null
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/FactoryUrlHandler.java
@@ -0,0 +1,96 @@
+package com.abin.mallchat.common.common.utils.chain;
+
+import cn.hutool.core.util.ReUtil;
+import cn.hutool.core.util.StrUtil;
+import com.abin.mallchat.common.common.utils.FutureUtils;
+import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
+import lombok.extern.slf4j.Slf4j;
+import org.jsoup.Connection;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.springframework.data.util.Pair;
+
+import javax.annotation.Nullable;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.CompletableFuture;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * Description: 链接处理工厂
+ * Author: achao
+ * Date: 2023/7/6 9:12
+ */
+@Slf4j
+public abstract class FactoryUrlHandler extends UrlHandler{
+
+ //链接识别的正则
+ private static final Pattern PATTERN = Pattern.compile("((http|https)://)?(www.)?([\\w_-]+(?:(?:\\.[\\w_-]+)+))([\\w.,@?^=%&:/~+#-]*[\\w@?^=%&/~+#-])?");
+
+ @Override
+ @Nullable
+ public Map getUrlContentMap(String content) {
+
+ if (StrUtil.isBlank(content)) {
+ return new HashMap<>();
+ }
+ List matchList = ReUtil.findAll(PATTERN, content, 0);
+
+ //并行请求
+ List>> futures = matchList.stream().map(match -> CompletableFuture.supplyAsync(() -> {
+ UrlInfo urlInfo = getContent(match);
+ return Objects.isNull(urlInfo) ? null : Pair.of(match, urlInfo);
+ })).collect(Collectors.toList());
+ CompletableFuture>> future = FutureUtils.sequenceNonNull(futures);
+ //结果组装
+ return future.join().stream().collect(Collectors.toMap(Pair::getFirst, Pair::getSecond, (a, b) -> a));
+ }
+
+ private UrlInfo getContent(String url){
+ url = !StrUtil.startWith(url, "http") ? "http://" + url : url;
+ Document document = getUrlDocument(url);
+ return UrlInfo.builder()
+ .title(getTitle(document))
+ .description(getDescription(document))
+ .image(getImage(url,document)).build();
+ }
+
+ protected Document getUrlDocument(String matchUrl) {
+ try {
+ Connection connect = Jsoup.connect(matchUrl);
+ connect.timeout(2000);
+ return connect.get();
+ } catch (Exception e) {
+ log.error("find error:url:{}", matchUrl, e);
+ }
+ return null;
+ }
+
+ /**
+ * 获取链接的标题
+ * @param document
+ * @return
+ */
+ @Nullable
+ abstract String getTitle(Document document);
+
+ /**
+ * 获取链接的描述
+ * @param document
+ * @return
+ */
+ @Nullable
+ abstract String getDescription(Document document);
+
+ /**
+ * 获取链接的LOGO
+ * @param document
+ * @return
+ */
+ @Nullable
+ abstract String getImage(String url, Document document);
+
+}
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/PrioritizedUrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/PrioritizedUrlHandler.java
new file mode 100644
index 0000000..b06ad32
--- /dev/null
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/PrioritizedUrlHandler.java
@@ -0,0 +1,39 @@
+package com.abin.mallchat.common.common.utils.chain;
+
+import cn.hutool.core.util.StrUtil;
+import com.abin.mallchat.common.common.utils.discover.UrlDiscover;
+import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
+import org.jetbrains.annotations.Nullable;
+import org.jsoup.nodes.Document;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Description: 优先级链接统一处理扩展类
+ * Author: achao
+ * Date: 2023/7/6 9:36
+ */
+public class PrioritizedUrlHandler extends FactoryUrlHandler {
+
+ private final FactoryUrlHandler commonUrlHandler = new CommonUrlHandler();
+ private final FactoryUrlHandler wxUrlHandler = new WxUrlHandler();
+
+ @Nullable
+ @Override
+ String getTitle(Document document) {
+ return StrUtil.isBlank(wxUrlHandler.getTitle(document)) ? commonUrlHandler.getTitle(document) : wxUrlHandler.getTitle(document);
+ }
+
+ @Nullable
+ @Override
+ String getDescription(Document document) {
+ return StrUtil.isBlank(wxUrlHandler.getDescription(document)) ? commonUrlHandler.getDescription(document) : wxUrlHandler.getDescription(document);
+ }
+
+ @Nullable
+ @Override
+ String getImage(String url, Document document) {
+ return StrUtil.isBlank(wxUrlHandler.getImage(url, document)) ? commonUrlHandler.getImage(url, document) : wxUrlHandler.getImage(url, document);
+ }
+}
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/UrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/UrlHandler.java
new file mode 100644
index 0000000..c9a9a3d
--- /dev/null
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/UrlHandler.java
@@ -0,0 +1,25 @@
+package com.abin.mallchat.common.common.utils.chain;
+
+import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
+import org.jsoup.nodes.Document;
+
+import javax.annotation.Nullable;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+/**
+ * Description: url集合处理抽象接口定义类
+ * Author: achao
+ * Date: 2023/7/6 8:58
+ */
+public abstract class UrlHandler {
+
+ /**
+ * 提取消息中的所有链接,并组装Map
+ * @param content
+ * @return
+ */
+ @Nullable
+ abstract Map getUrlContentMap(String content);
+
+}
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/WxUrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/WxUrlHandler.java
new file mode 100644
index 0000000..f8356d9
--- /dev/null
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/WxUrlHandler.java
@@ -0,0 +1,32 @@
+package com.abin.mallchat.common.common.utils.chain;
+
+import cn.hutool.core.util.StrUtil;
+import org.jetbrains.annotations.Nullable;
+import org.jsoup.nodes.Document;
+
+/**
+ * Description:
+ * Author: achao
+ * Date: 2023/7/6 9:34
+ */
+public class WxUrlHandler extends FactoryUrlHandler {
+
+ @Nullable
+ @Override
+ public String getTitle(Document document) {
+ return document.getElementsByAttributeValue("property", "og:title").attr("content");
+ }
+
+ @Nullable
+ @Override
+ public String getDescription(Document document) {
+ String description = document.getElementsByAttributeValue("property", "og:description").attr("content");
+ return StrUtil.isNotBlank(description) ? description.substring(0, description.indexOf("。")) : description;
+ }
+
+ @Nullable
+ @Override
+ public String getImage(String url, Document document) {
+ return document.getElementsByAttributeValue("property", "og:image").attr("content");
+ }
+}
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/dto/UrlInfo.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/dto/UrlInfo.java
new file mode 100644
index 0000000..ad1050a
--- /dev/null
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/dto/UrlInfo.java
@@ -0,0 +1,32 @@
+package com.abin.mallchat.common.common.utils.chain.dto;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+/**
+ * Description: 链接信息提取类
+ * Author: achao
+ * Date: 2023/7/6 8:54
+ */
+@Data
+@Builder
+@AllArgsConstructor
+@NoArgsConstructor
+public class UrlInfo {
+ /**
+ * 标题
+ **/
+ String title;
+
+ /**
+ * 描述
+ **/
+ String description;
+
+ /**
+ * 网站LOGO
+ **/
+ String image;
+}
From a30d5f27661174788bb13f8759cd175eeb2d8bc6 Mon Sep 17 00:00:00 2001
From: zhaoqichao <1416537683@qq.com>
Date: Thu, 6 Jul 2023 10:26:45 +0800
Subject: [PATCH 5/6] =?UTF-8?q?Url=E8=A7=A3=E6=9E=90=E7=BB=84=E4=BB=B6?=
=?UTF-8?q?=E4=BC=98=E5=85=88=E7=BA=A7=E8=B0=83=E6=95=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../common/common/utils/discover/CommonUrlDiscover.java | 4 ----
.../common/common/utils/discover/PrioritizedUrlDiscover.java | 2 +-
2 files changed, 1 insertion(+), 5 deletions(-)
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlDiscover.java
index b73c81d..2f3b842 100644
--- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlDiscover.java
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlDiscover.java
@@ -31,10 +31,6 @@ public class CommonUrlDiscover extends AbstractUrlDiscover {
@Nullable
@Override
public String getImage(String url, Document document) {
- //如果包含og则是微信链接
- if(StrUtil.isNotBlank(document.getElementsByAttributeValue("property", "og:title").attr("content"))){
- return null;
- }
String image = document.select("link[type=image/x-icon]").attr("href");
//如果没有去匹配含有icon属性的logo
String href = StrUtil.isEmpty(image) ? document.select("link[rel$=icon]").attr("href") : image;
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java
index 106f598..eef3c11 100644
--- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java
@@ -17,8 +17,8 @@ public class PrioritizedUrlDiscover extends AbstractUrlDiscover {
private final List urlDiscovers = new ArrayList<>(2);
public PrioritizedUrlDiscover() {
- urlDiscovers.add(new CommonUrlDiscover());
urlDiscovers.add(new WxUrlDiscover());
+ urlDiscovers.add(new CommonUrlDiscover());
}
From eddee5647fa893626197d16d062cc481d6c0c459 Mon Sep 17 00:00:00 2001
From: zhaoqichao <1416537683@qq.com>
Date: Thu, 6 Jul 2023 10:28:04 +0800
Subject: [PATCH 6/6] =?UTF-8?q?Url=E8=A7=A3=E6=9E=90=E7=BB=84=E4=BB=B6?=
=?UTF-8?q?=E6=A8=A1=E5=BC=8F=E9=87=8D=E6=9E=84=EF=BC=88=E4=B8=8D=E5=BD=B1?=
=?UTF-8?q?=E5=93=8D=E6=97=A7=E7=89=88=E6=9C=AC=E4=BD=BF=E7=94=A8=EF=BC=89?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../common/utils/chain/CommonUrlHandler.java | 51 +++++++++++++++++++
1 file changed, 51 insertions(+)
create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/CommonUrlHandler.java
diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/CommonUrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/CommonUrlHandler.java
new file mode 100644
index 0000000..5d85f00
--- /dev/null
+++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/CommonUrlHandler.java
@@ -0,0 +1,51 @@
+package com.abin.mallchat.common.common.utils.chain;
+
+import cn.hutool.core.util.StrUtil;
+import org.jetbrains.annotations.Nullable;
+import org.jsoup.nodes.Document;
+
+/**
+ * Description:
+ * Author: achao
+ * Date: 2023/7/6 9:25
+ */
+public class CommonUrlHandler extends FactoryUrlHandler {
+
+ @Nullable
+ @Override
+ public String getTitle(Document document) {
+ return document.title();
+ }
+
+ @Nullable
+ @Override
+ public String getDescription(Document document) {
+ String description = document.head().select("meta[name=description]").attr("content");
+ String keywords = document.head().select("meta[name=keywords]").attr("content");
+ String content = StrUtil.isNotBlank(description) ? description : keywords;
+ //只保留一句话的描述
+ return StrUtil.isNotBlank(content) ? content.substring(0, content.indexOf("。")) : content;
+ }
+
+ @Nullable
+ @Override
+ public String getImage(String url, Document document) {
+ String image = document.select("link[type=image/x-icon]").attr("href");
+ //如果没有去匹配含有icon属性的logo
+ String href = StrUtil.isEmpty(image) ? document.select("link[rel$=icon]").attr("href") : image;
+ //如果icon中已经包含了url部分域名
+ if (StrUtil.isNotBlank(StrUtil.removeAny(StrUtil.removeAny(href, "/"), "favicon.ico")) &&
+ StrUtil.containsAny(StrUtil.removePrefix(url, "http://"), StrUtil.removeAny(StrUtil.removeAny(href, "/"), "favicon.ico"))) {
+ return "http://" + StrUtil.removePrefix(href, "/");
+ }
+ //如果url已经包含了logo
+ if (StrUtil.containsAny(url, "favicon")) {
+ return url;
+ }
+ //如果logo中有url
+ if (StrUtil.containsAny(href, "http") || StrUtil.containsAny(href, "https")) {
+ return href;
+ }
+ return StrUtil.format("{}/{}", url, StrUtil.removePrefix(href, "/"));
+ }
+}