From 3c77ed3c5fd5386c7d2c5ebc31ac3b9dc698d73a Mon Sep 17 00:00:00 2001 From: zhaoqichao <1416537683@qq.com> Date: Wed, 5 Jul 2023 09:29:21 +0800 Subject: [PATCH 1/6] =?UTF-8?q?Url=E8=A7=A3=E6=9E=90=E7=BB=84=E4=BB=B6?= =?UTF-8?q?=E9=87=8D=E6=9E=84=E5=8F=8A=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...Discover.java => AbstractUrlDiscover.java} | 38 +++++++----- .../utils/discover/CommonUrlDiscover.java | 56 +++++++++++++++++ .../discover/CommonUrlTitleDiscover.java | 15 ----- .../discover/PrioritizedUrlDiscover.java | 60 +++++++++++++++++++ .../discover/PrioritizedUrlTitleDiscover.java | 33 ---------- .../common/utils/discover/UrlDiscover.java | 46 ++++++++++++++ .../utils/discover/UrlTitleDiscover.java | 35 ----------- .../common/utils/discover/WxUrlDiscover.java | 30 ++++++++++ .../utils/discover/WxUrlTitleDiscover.java | 15 ----- .../service/strategy/msg/TextMsgHandler.java | 4 +- 10 files changed, 218 insertions(+), 114 deletions(-) rename mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/{AbstractUrlTitleDiscover.java => AbstractUrlDiscover.java} (64%) create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlDiscover.java delete mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlTitleDiscover.java create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java delete mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlTitleDiscover.java create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/UrlDiscover.java delete mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/UrlTitleDiscover.java create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/WxUrlDiscover.java delete mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/WxUrlTitleDiscover.java diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlTitleDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlDiscover.java similarity index 64% rename from mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlTitleDiscover.java rename to mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlDiscover.java index 79cbefe..80d7ab8 100644 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlTitleDiscover.java +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlDiscover.java @@ -3,14 +3,15 @@ package com.abin.mallchat.common.common.utils.discover; import cn.hutool.core.util.ReUtil; import cn.hutool.core.util.StrUtil; import com.abin.mallchat.common.common.utils.FutureUtils; +import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; +import org.jetbrains.annotations.Nullable; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.springframework.data.util.Pair; -import javax.annotation.Nullable; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -20,46 +21,55 @@ import java.util.regex.Pattern; import java.util.stream.Collectors; /** - * Description: urlTitle查询抽象类 - * Author: abin - * Date: 2023-05-27 + * @author zhaoqichao + * @date 2023/7/3 16:38 */ @Slf4j -public abstract class AbstractUrlTitleDiscover implements UrlTitleDiscover { +public abstract class AbstractUrlDiscover implements UrlDiscover { //链接识别的正则 private static final Pattern PATTERN = Pattern.compile("((http|https)://)?(www.)?([\\w_-]+(?:(?:\\.[\\w_-]+)+))([\\w.,@?^=%&:/~+#-]*[\\w@?^=%&/~+#-])?"); - @Nullable + + @javax.annotation.Nullable @Override - public Map getContentTitleMap(String content) { + public Map getUrlContentMap(String content) { + if (StrUtil.isBlank(content)) { return new HashMap<>(); } List matchList = ReUtil.findAll(PATTERN, content, 0); + //并行请求 - List>> futures = matchList.stream().map(match -> CompletableFuture.supplyAsync(() -> { - String title = getUrlTitle(match); - return StringUtils.isNotEmpty(title) ? Pair.of(match, title) : null; + List>> futures = matchList.stream().map(match -> CompletableFuture.supplyAsync(() -> { + UrlInfo urlInfo = getContent(match); + return Objects.isNull(urlInfo) ? null : Pair.of(match, urlInfo); })).collect(Collectors.toList()); - CompletableFuture>> future = FutureUtils.sequenceNonNull(futures); + CompletableFuture>> future = FutureUtils.sequenceNonNull(futures); //结果组装 return future.join().stream().collect(Collectors.toMap(Pair::getFirst, Pair::getSecond, (a, b) -> a)); } @Nullable @Override - public String getUrlTitle(String url) { + public UrlInfo getContent(String url) { Document document = getUrlDocument(assemble(url)); if (Objects.isNull(document)) { return null; } - return getDocTitle(document); + + return UrlInfo.builder() + .title(getTitle(document)) + .description(getDescription(document)) + .image(getImage(assemble(url),document)).build(); } + private String assemble(String url) { + if (!StrUtil.startWith(url, "http")) { return "http://" + url; } + return url; } @@ -69,7 +79,7 @@ public abstract class AbstractUrlTitleDiscover implements UrlTitleDiscover { connect.timeout(2000); return connect.get(); } catch (Exception e) { - log.error("find title error:url:{}", matchUrl, e); + log.error("find error:url:{}", matchUrl, e); } return null; } diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlDiscover.java new file mode 100644 index 0000000..b73c81d --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlDiscover.java @@ -0,0 +1,56 @@ +package com.abin.mallchat.common.common.utils.discover; + +import cn.hutool.core.util.StrUtil; +import io.jsonwebtoken.lang.Objects; +import org.apache.commons.lang3.StringUtils; +import org.jetbrains.annotations.Nullable; +import org.jsoup.nodes.Document; +import org.jsoup.select.Elements; + +/** + * @author zhaoqichao + * @date 2023/7/3 16:54 + */ +public class CommonUrlDiscover extends AbstractUrlDiscover { + @Nullable + @Override + public String getTitle(Document document) { + return document.title(); + } + + @Nullable + @Override + public String getDescription(Document document) { + String description = document.head().select("meta[name=description]").attr("content"); + String keywords = document.head().select("meta[name=keywords]").attr("content"); + String content = StrUtil.isNotBlank(description) ? description : keywords; + //只保留一句话的描述 + return StrUtil.isNotBlank(content) ? content.substring(0, content.indexOf("。")) : content; + } + + @Nullable + @Override + public String getImage(String url, Document document) { + //如果包含og则是微信链接 + if(StrUtil.isNotBlank(document.getElementsByAttributeValue("property", "og:title").attr("content"))){ + return null; + } + String image = document.select("link[type=image/x-icon]").attr("href"); + //如果没有去匹配含有icon属性的logo + String href = StrUtil.isEmpty(image) ? document.select("link[rel$=icon]").attr("href") : image; + //如果icon中已经包含了url部分域名 + if (StrUtil.isNotBlank(StrUtil.removeAny(StrUtil.removeAny(href, "/"), "favicon.ico")) && + StrUtil.containsAny(StrUtil.removePrefix(url, "http://"), StrUtil.removeAny(StrUtil.removeAny(href, "/"), "favicon.ico"))) { + return "http://" + StrUtil.removePrefix(href, "/"); + } + //如果url已经包含了logo + if (StrUtil.containsAny(url, "favicon")) { + return url; + } + //如果logo中有url + if (StrUtil.containsAny(href, "http") || StrUtil.containsAny(href, "https")) { + return href; + } + return StrUtil.format("{}/{}", url, StrUtil.removePrefix(href, "/")); + } +} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlTitleDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlTitleDiscover.java deleted file mode 100644 index 6471610..0000000 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlTitleDiscover.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.abin.mallchat.common.common.utils.discover; - -import org.jsoup.nodes.Document; - -/** - * Description: 通用的标题解析类 - * Author: abin - * Date: 2023-05-27 - */ -public class CommonUrlTitleDiscover extends AbstractUrlTitleDiscover { - @Override - public String getDocTitle(Document document) { - return document.title(); - } -} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java new file mode 100644 index 0000000..ab57334 --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java @@ -0,0 +1,60 @@ +package com.abin.mallchat.common.common.utils.discover; + +import cn.hutool.core.util.StrUtil; +import org.jetbrains.annotations.Nullable; +import org.jsoup.nodes.Document; + +import java.util.ArrayList; +import java.util.List; + +/** + * Description: 具有优先级的title查询器 + * Author: abin + * Date: 2023-05-27 + */ +public class PrioritizedUrlDiscover extends AbstractUrlDiscover { + + private final List urlTitleDiscovers = new ArrayList<>(2); + + public PrioritizedUrlDiscover() { + urlTitleDiscovers.add(new CommonUrlDiscover()); + urlTitleDiscovers.add(new WxUrlDiscover()); + } + + + @Nullable + @Override + public String getTitle(Document document) { + for (UrlDiscover urlDiscover : urlTitleDiscovers) { + String urlTitle = urlDiscover.getTitle(document); + if (StrUtil.isNotBlank(urlTitle)) { + return urlTitle; + } + } + return null; + } + + @Nullable + @Override + public String getDescription(Document document) { + for (UrlDiscover urlDiscover : urlTitleDiscovers) { + String urlDescription = urlDiscover.getDescription(document); + if (StrUtil.isNotBlank(urlDescription)) { + return urlDescription; + } + } + return null; + } + + @Nullable + @Override + public String getImage(String url, Document document) { + for (UrlDiscover urlDiscover : urlTitleDiscovers) { + String urlImage = urlDiscover.getImage(url,document); + if (StrUtil.isNotBlank(urlImage)) { + return urlImage; + } + } + return null; + } +} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlTitleDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlTitleDiscover.java deleted file mode 100644 index 8c7fb4d..0000000 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlTitleDiscover.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.abin.mallchat.common.common.utils.discover; - -import cn.hutool.core.util.StrUtil; -import org.jsoup.nodes.Document; - -import java.util.ArrayList; -import java.util.List; - -/** - * Description: 具有优先级的title查询器 - * Author: abin - * Date: 2023-05-27 - */ -public class PrioritizedUrlTitleDiscover extends AbstractUrlTitleDiscover { - - private final List urlTitleDiscovers = new ArrayList<>(2); - - public PrioritizedUrlTitleDiscover() { - urlTitleDiscovers.add(new CommonUrlTitleDiscover()); - urlTitleDiscovers.add(new WxUrlTitleDiscover()); - } - - @Override - public String getDocTitle(Document document) { - for (UrlTitleDiscover urlTitleDiscover : urlTitleDiscovers) { - String urlTitle = urlTitleDiscover.getDocTitle(document); - if (StrUtil.isNotBlank(urlTitle)) { - return urlTitle; - } - } - return null; - } -} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/UrlDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/UrlDiscover.java new file mode 100644 index 0000000..3515c28 --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/UrlDiscover.java @@ -0,0 +1,46 @@ +package com.abin.mallchat.common.common.utils.discover; + +import cn.hutool.core.date.StopWatch; +import cn.hutool.core.util.StrUtil; +import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; +import org.jsoup.nodes.Document; + +import javax.annotation.Nullable; +import java.util.Map; + +/** + * @author zhaoqichao + * @date 2023/7/3 16:34 + */ +public interface UrlDiscover { + + + @Nullable + Map getUrlContentMap(String content); + + @Nullable + UrlInfo getContent(String url); + + @Nullable + String getTitle(Document document); + + @Nullable + String getDescription(Document document); + + @Nullable + String getImage(String url, Document document); + + public static void main(String[] args) { + StopWatch stopWatch = new StopWatch(); + stopWatch.start(); + String longStr = "其中包含一个URL www.baidu.com,一个带有端口号的URL http://www.jd.com:80, 一个带有路径的URL http://mallchat.cn, 还有美团技术文章https://mp.weixin.qq.com/s/hwTf4bDck9_tlFpgVDeIKg "; +// String longStr = "一个带有端口号的URL http://www.jd.com:80,"; +// String longStr = "一个带有路径的URL http://mallchat.cn"; + PrioritizedUrlDiscover discover = new PrioritizedUrlDiscover(); + final Map map = discover.getUrlContentMap(longStr); + System.out.println(map); + stopWatch.stop(); + long cost = stopWatch.getTotalTimeMillis(); + System.out.println(cost); + } +} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/UrlTitleDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/UrlTitleDiscover.java deleted file mode 100644 index e2fac68..0000000 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/UrlTitleDiscover.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.abin.mallchat.common.common.utils.discover; - -import cn.hutool.core.date.StopWatch; -import org.jsoup.nodes.Document; - -import javax.annotation.Nullable; -import java.util.Map; - -public interface UrlTitleDiscover { - - - @Nullable - Map getContentTitleMap(String content); - - - @Nullable - String getUrlTitle(String url); - - @Nullable - String getDocTitle(Document document); - - public static void main(String[] args) {//用异步多任务查询并合并 974 //串行访问的速度1349 1291 1283 1559 - StopWatch stopWatch = new StopWatch(); - stopWatch.start(); - String longStr = "这是一个很长的字符串再来 www.github.com,其中包含一个URL www.baidu.com,, 一个带有端口号的URL http://www.jd.com:80, 一个带有路径的URL http://mallchat.cn, 还有美团技术文章https://mp.weixin.qq.com/s/hwTf4bDck9_tlFpgVDeIKg "; - PrioritizedUrlTitleDiscover discover = new PrioritizedUrlTitleDiscover(); - Map contentTitleMap = discover.getContentTitleMap(longStr); - System.out.println(contentTitleMap); -// -// Jsoup.connect("http:// www.github.com"); - stopWatch.stop(); - long cost = stopWatch.getTotalTimeMillis(); - System.out.println(cost); - }//{http://mallchat.cn=MallChat, www.baidu.com=百度一下,你就知道, https://mp.weixin.qq.com/s/hwTf4bDck9_tlFpgVDeIKg=超大规模数据库集群保稳系列之二:数据库攻防演练建设实践, http://www.jd.com:80=京东(JD.COM)-正品低价、品质保障、配送及时、轻松购物!} -} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/WxUrlDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/WxUrlDiscover.java new file mode 100644 index 0000000..a5bb330 --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/WxUrlDiscover.java @@ -0,0 +1,30 @@ +package com.abin.mallchat.common.common.utils.discover; + +import org.jetbrains.annotations.Nullable; +import org.jsoup.nodes.Document; + +/** + * Description: 针对微信公众号文章的标题获取类 + * Author: abin + * Date: 2023-05-27 + */ +public class WxUrlDiscover extends AbstractUrlDiscover { + + @Nullable + @Override + public String getTitle(Document document) { + return document.getElementsByAttributeValue("property", "og:title").attr("content"); + } + + @Nullable + @Override + public String getDescription(Document document) { + return document.getElementsByAttributeValue("property", "og:description").attr("content"); + } + + @Nullable + @Override + public String getImage(String url, Document document) { + return document.getElementsByAttributeValue("property", "og:image").attr("content"); + } +} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/WxUrlTitleDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/WxUrlTitleDiscover.java deleted file mode 100644 index 29b1172..0000000 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/WxUrlTitleDiscover.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.abin.mallchat.common.common.utils.discover; - -import org.jsoup.nodes.Document; - -/** - * Description: 针对微信公众号文章的标题获取类 - * Author: abin - * Date: 2023-05-27 - */ -public class WxUrlTitleDiscover extends AbstractUrlTitleDiscover { - @Override - public String getDocTitle(Document document) { - return document.getElementsByAttributeValue("property", "og:title").attr("content"); - } -} diff --git a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/strategy/msg/TextMsgHandler.java b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/strategy/msg/TextMsgHandler.java index d8e347a..6b63a31 100644 --- a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/strategy/msg/TextMsgHandler.java +++ b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/strategy/msg/TextMsgHandler.java @@ -11,7 +11,7 @@ import com.abin.mallchat.common.chat.service.cache.MsgCache; import com.abin.mallchat.common.common.domain.enums.YesOrNoEnum; import com.abin.mallchat.common.common.utils.AssertUtil; import com.abin.mallchat.common.common.utils.SensitiveWordUtils; -import com.abin.mallchat.common.common.utils.discover.PrioritizedUrlTitleDiscover; +import com.abin.mallchat.common.common.utils.discover.PrioritizedUrlDiscover; import com.abin.mallchat.common.user.domain.entity.User; import com.abin.mallchat.common.user.domain.enums.RoleEnum; import com.abin.mallchat.common.user.service.IRoleService; @@ -47,7 +47,7 @@ public class TextMsgHandler extends AbstractMsgHandler { @Autowired private IRoleService iRoleService; - private static final PrioritizedUrlTitleDiscover URL_TITLE_DISCOVER = new PrioritizedUrlTitleDiscover(); + private static final PrioritizedUrlDiscover URL_TITLE_DISCOVER = new PrioritizedUrlDiscover(); @Override MessageTypeEnum getMsgTypeEnum() { From 27a7709f08e2da30cfa00edc7faa3e78095c55cb Mon Sep 17 00:00:00 2001 From: zhaoqichao <1416537683@qq.com> Date: Wed, 5 Jul 2023 09:42:04 +0800 Subject: [PATCH 2/6] =?UTF-8?q?Url=E8=A7=A3=E6=9E=90=E7=BB=84=E4=BB=B6?= =?UTF-8?q?=E9=87=8D=E6=9E=84=E5=8F=8A=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- MallChat | 1 + .../common/utils/discover/domain/UrlInfo.java | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 160000 MallChat create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/domain/UrlInfo.java diff --git a/MallChat b/MallChat new file mode 160000 index 0000000..c47b487 --- /dev/null +++ b/MallChat @@ -0,0 +1 @@ +Subproject commit c47b48760dd1eaaed6cf1c62930c65032ed66752 diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/domain/UrlInfo.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/domain/UrlInfo.java new file mode 100644 index 0000000..8f02b7c --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/domain/UrlInfo.java @@ -0,0 +1,32 @@ +package com.abin.mallchat.common.common.utils.discover.domain; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * @author zhaoqichao + * @date 2023/7/3 16:12 + */ +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class UrlInfo { + /** + * 标题 + **/ + String title; + + /** + * 描述 + **/ + String description; + + /** + * 网站LOGO + **/ + String image; + +} From b516e51bc931bc96ffa08acdfa2dfe1f13db13bf Mon Sep 17 00:00:00 2001 From: zhaoqichao <1416537683@qq.com> Date: Wed, 5 Jul 2023 18:12:17 +0800 Subject: [PATCH 3/6] =?UTF-8?q?Url=E8=A7=A3=E6=9E=90=E7=BB=84=E4=BB=B6?= =?UTF-8?q?=E4=BC=98=E5=8C=96=EF=BC=8Cserver=E7=AB=AF=E6=98=A0=E5=B0=84?= =?UTF-8?q?=E8=B0=83=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../common/chat/domain/entity/msg/MessageExtra.java | 3 ++- .../common/utils/discover/AbstractUrlDiscover.java | 4 ++-- .../utils/discover/PrioritizedUrlDiscover.java | 12 ++++++------ .../chat/domain/vo/response/ChatMessageResp.java | 3 ++- .../chat/domain/vo/response/msg/TextMsgResp.java | 3 ++- .../custom/chat/service/adapter/MessageAdapter.java | 2 +- .../chat/service/strategy/msg/TextMsgHandler.java | 7 ++++--- 7 files changed, 19 insertions(+), 15 deletions(-) diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/chat/domain/entity/msg/MessageExtra.java b/mallchat-common/src/main/java/com/abin/mallchat/common/chat/domain/entity/msg/MessageExtra.java index 9a68c74..fa3af00 100644 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/chat/domain/entity/msg/MessageExtra.java +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/chat/domain/entity/msg/MessageExtra.java @@ -1,5 +1,6 @@ package com.abin.mallchat.common.chat.domain.entity.msg; +import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.AllArgsConstructor; import lombok.Builder; @@ -23,7 +24,7 @@ import java.util.Map; public class MessageExtra implements Serializable { private static final long serialVersionUID = 1L; //url跳转链接 - private Map urlTitleMap; + private Map urlContentMap; //消息撤回详情 private MsgRecall recall; //艾特的uid diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlDiscover.java index 80d7ab8..4da24e9 100644 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlDiscover.java +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/AbstractUrlDiscover.java @@ -5,7 +5,6 @@ import cn.hutool.core.util.StrUtil; import com.abin.mallchat.common.common.utils.FutureUtils; import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.StringUtils; import org.jetbrains.annotations.Nullable; import org.jsoup.Connection; import org.jsoup.Jsoup; @@ -30,7 +29,7 @@ public abstract class AbstractUrlDiscover implements UrlDiscover { private static final Pattern PATTERN = Pattern.compile("((http|https)://)?(www.)?([\\w_-]+(?:(?:\\.[\\w_-]+)+))([\\w.,@?^=%&:/~+#-]*[\\w@?^=%&/~+#-])?"); - @javax.annotation.Nullable + @Nullable @Override public Map getUrlContentMap(String content) { @@ -83,4 +82,5 @@ public abstract class AbstractUrlDiscover implements UrlDiscover { } return null; } + } diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java index ab57334..106f598 100644 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java @@ -14,18 +14,18 @@ import java.util.List; */ public class PrioritizedUrlDiscover extends AbstractUrlDiscover { - private final List urlTitleDiscovers = new ArrayList<>(2); + private final List urlDiscovers = new ArrayList<>(2); public PrioritizedUrlDiscover() { - urlTitleDiscovers.add(new CommonUrlDiscover()); - urlTitleDiscovers.add(new WxUrlDiscover()); + urlDiscovers.add(new CommonUrlDiscover()); + urlDiscovers.add(new WxUrlDiscover()); } @Nullable @Override public String getTitle(Document document) { - for (UrlDiscover urlDiscover : urlTitleDiscovers) { + for (UrlDiscover urlDiscover : urlDiscovers) { String urlTitle = urlDiscover.getTitle(document); if (StrUtil.isNotBlank(urlTitle)) { return urlTitle; @@ -37,7 +37,7 @@ public class PrioritizedUrlDiscover extends AbstractUrlDiscover { @Nullable @Override public String getDescription(Document document) { - for (UrlDiscover urlDiscover : urlTitleDiscovers) { + for (UrlDiscover urlDiscover : urlDiscovers) { String urlDescription = urlDiscover.getDescription(document); if (StrUtil.isNotBlank(urlDescription)) { return urlDescription; @@ -49,7 +49,7 @@ public class PrioritizedUrlDiscover extends AbstractUrlDiscover { @Nullable @Override public String getImage(String url, Document document) { - for (UrlDiscover urlDiscover : urlTitleDiscovers) { + for (UrlDiscover urlDiscover : urlDiscovers) { String urlImage = urlDiscover.getImage(url,document); if (StrUtil.isNotBlank(urlImage)) { return urlImage; diff --git a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/domain/vo/response/ChatMessageResp.java b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/domain/vo/response/ChatMessageResp.java index 968aae9..2cfed96 100644 --- a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/domain/vo/response/ChatMessageResp.java +++ b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/domain/vo/response/ChatMessageResp.java @@ -1,5 +1,6 @@ package com.abin.mallchat.custom.chat.domain.vo.response; +import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; import io.swagger.annotations.ApiModelProperty; import lombok.AllArgsConstructor; import lombok.Builder; @@ -50,7 +51,7 @@ public class ChatMessageResp { private String content; @ApiModelProperty("消息链接映射-废弃") @Deprecated - private Map urlTitleMap; + private Map urlContentMap; @ApiModelProperty("消息类型 1正常文本 2.撤回消息") private Integer type; @ApiModelProperty("消息内容不同的消息类型,内容体不同,见https://www.yuque.com/snab/mallcaht/rkb2uz5k1qqdmcmd") diff --git a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/domain/vo/response/msg/TextMsgResp.java b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/domain/vo/response/msg/TextMsgResp.java index 34db6c5..17a7b48 100644 --- a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/domain/vo/response/msg/TextMsgResp.java +++ b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/domain/vo/response/msg/TextMsgResp.java @@ -1,5 +1,6 @@ package com.abin.mallchat.custom.chat.domain.vo.response.msg; +import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; import io.swagger.annotations.ApiModelProperty; import lombok.AllArgsConstructor; import lombok.Builder; @@ -22,7 +23,7 @@ public class TextMsgResp { @ApiModelProperty("消息内容") private String content; @ApiModelProperty("消息链接映射") - private Map urlTitleMap; + private Map urlContentMap; @ApiModelProperty("艾特的uid") private List atUidList; @ApiModelProperty("父消息,如果没有父消息,返回的是null") diff --git a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/adapter/MessageAdapter.java b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/adapter/MessageAdapter.java index 53e10fa..a751ad6 100644 --- a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/adapter/MessageAdapter.java +++ b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/adapter/MessageAdapter.java @@ -56,7 +56,7 @@ public class MessageAdapter { messageVO.setSendTime(message.getCreateTime()); AbstractMsgHandler msgHandler = MsgHandlerFactory.getStrategyNoNull(message.getType()); messageVO.setBody(msgHandler.showMsg(message)); - messageVO.setUrlTitleMap(Optional.ofNullable(message.getExtra()).map(MessageExtra::getUrlTitleMap).orElse(null)); + messageVO.setUrlContentMap(Optional.ofNullable(message.getExtra()).map(MessageExtra::getUrlContentMap).orElse(null)); Message replyMessage = replyMap.get(message.getReplyMsgId()); //回复消息 diff --git a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/strategy/msg/TextMsgHandler.java b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/strategy/msg/TextMsgHandler.java index 6b63a31..1cc3f25 100644 --- a/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/strategy/msg/TextMsgHandler.java +++ b/mallchat-custom-server/src/main/java/com/abin/mallchat/custom/chat/service/strategy/msg/TextMsgHandler.java @@ -12,6 +12,7 @@ import com.abin.mallchat.common.common.domain.enums.YesOrNoEnum; import com.abin.mallchat.common.common.utils.AssertUtil; import com.abin.mallchat.common.common.utils.SensitiveWordUtils; import com.abin.mallchat.common.common.utils.discover.PrioritizedUrlDiscover; +import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; import com.abin.mallchat.common.user.domain.entity.User; import com.abin.mallchat.common.user.domain.enums.RoleEnum; import com.abin.mallchat.common.user.service.IRoleService; @@ -91,8 +92,8 @@ public class TextMsgHandler extends AbstractMsgHandler { } //判断消息url跳转 - Map urlTitleMap = URL_TITLE_DISCOVER.getContentTitleMap(body.getContent()); - extra.setUrlTitleMap(urlTitleMap); + Map urlContentMap = URL_TITLE_DISCOVER.getUrlContentMap(body.getContent()); + extra.setUrlContentMap(urlContentMap); //艾特功能 if (CollectionUtil.isNotEmpty(body.getAtUidList())) { extra.setAtUidList(body.getAtUidList()); @@ -106,7 +107,7 @@ public class TextMsgHandler extends AbstractMsgHandler { public Object showMsg(Message msg) { TextMsgResp resp = new TextMsgResp(); resp.setContent(msg.getContent()); - resp.setUrlTitleMap(Optional.ofNullable(msg.getExtra()).map(MessageExtra::getUrlTitleMap).orElse(null)); + resp.setUrlContentMap(Optional.ofNullable(msg.getExtra()).map(MessageExtra::getUrlContentMap).orElse(null)); resp.setAtUidList(Optional.ofNullable(msg.getExtra()).map(MessageExtra::getAtUidList).orElse(null)); //回复消息 Optional reply = Optional.ofNullable(msg.getReplyMsgId()) From 21ea09cd4cb3e24941a5d1ee1e6e414dd3c8f3e3 Mon Sep 17 00:00:00 2001 From: zhaoqichao <1416537683@qq.com> Date: Thu, 6 Jul 2023 10:23:44 +0800 Subject: [PATCH 4/6] =?UTF-8?q?Url=E8=A7=A3=E6=9E=90=E7=BB=84=E4=BB=B6?= =?UTF-8?q?=E6=A8=A1=E5=BC=8F=E9=87=8D=E6=9E=84=EF=BC=88=E4=B8=8D=E5=BD=B1?= =?UTF-8?q?=E5=93=8D=E6=97=A7=E7=89=88=E6=9C=AC=E4=BD=BF=E7=94=A8=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../common/utils/chain/Application.java | 22 +++++ .../common/utils/chain/FactoryUrlHandler.java | 96 +++++++++++++++++++ .../utils/chain/PrioritizedUrlHandler.java | 39 ++++++++ .../common/common/utils/chain/UrlHandler.java | 25 +++++ .../common/utils/chain/WxUrlHandler.java | 32 +++++++ .../common/utils/chain/dto/UrlInfo.java | 32 +++++++ 6 files changed, 246 insertions(+) create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/Application.java create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/FactoryUrlHandler.java create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/PrioritizedUrlHandler.java create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/UrlHandler.java create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/WxUrlHandler.java create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/dto/UrlInfo.java diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/Application.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/Application.java new file mode 100644 index 0000000..b2c5d70 --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/Application.java @@ -0,0 +1,22 @@ +package com.abin.mallchat.common.common.utils.chain; + +import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; +import org.jetbrains.annotations.Nullable; +import org.jsoup.nodes.Document; + +import java.util.Map; + +/** + * Description: 测试 + * Author: achao + * Date: 2023/7/6 9:29 + */ +public class Application { + public static void main(String[] args) { + PrioritizedUrlHandler handler = new PrioritizedUrlHandler(); + String longStr = "其中包含一个URL www.baidu.com,一个带有端口号的URL http://www.jd.com:80, 一个带有路径的URL http://mallchat.cn, 还有美团技术文章https://mp.weixin.qq.com/s/hwTf4bDck9_tlFpgVDeIKg "; + + Map urlContentMap = handler.getUrlContentMap(longStr); + System.out.println(urlContentMap); + } +} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/FactoryUrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/FactoryUrlHandler.java new file mode 100644 index 0000000..bd8d0e8 --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/FactoryUrlHandler.java @@ -0,0 +1,96 @@ +package com.abin.mallchat.common.common.utils.chain; + +import cn.hutool.core.util.ReUtil; +import cn.hutool.core.util.StrUtil; +import com.abin.mallchat.common.common.utils.FutureUtils; +import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; +import lombok.extern.slf4j.Slf4j; +import org.jsoup.Connection; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.springframework.data.util.Pair; + +import javax.annotation.Nullable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.CompletableFuture; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * Description: 链接处理工厂 + * Author: achao + * Date: 2023/7/6 9:12 + */ +@Slf4j +public abstract class FactoryUrlHandler extends UrlHandler{ + + //链接识别的正则 + private static final Pattern PATTERN = Pattern.compile("((http|https)://)?(www.)?([\\w_-]+(?:(?:\\.[\\w_-]+)+))([\\w.,@?^=%&:/~+#-]*[\\w@?^=%&/~+#-])?"); + + @Override + @Nullable + public Map getUrlContentMap(String content) { + + if (StrUtil.isBlank(content)) { + return new HashMap<>(); + } + List matchList = ReUtil.findAll(PATTERN, content, 0); + + //并行请求 + List>> futures = matchList.stream().map(match -> CompletableFuture.supplyAsync(() -> { + UrlInfo urlInfo = getContent(match); + return Objects.isNull(urlInfo) ? null : Pair.of(match, urlInfo); + })).collect(Collectors.toList()); + CompletableFuture>> future = FutureUtils.sequenceNonNull(futures); + //结果组装 + return future.join().stream().collect(Collectors.toMap(Pair::getFirst, Pair::getSecond, (a, b) -> a)); + } + + private UrlInfo getContent(String url){ + url = !StrUtil.startWith(url, "http") ? "http://" + url : url; + Document document = getUrlDocument(url); + return UrlInfo.builder() + .title(getTitle(document)) + .description(getDescription(document)) + .image(getImage(url,document)).build(); + } + + protected Document getUrlDocument(String matchUrl) { + try { + Connection connect = Jsoup.connect(matchUrl); + connect.timeout(2000); + return connect.get(); + } catch (Exception e) { + log.error("find error:url:{}", matchUrl, e); + } + return null; + } + + /** + * 获取链接的标题 + * @param document + * @return + */ + @Nullable + abstract String getTitle(Document document); + + /** + * 获取链接的描述 + * @param document + * @return + */ + @Nullable + abstract String getDescription(Document document); + + /** + * 获取链接的LOGO + * @param document + * @return + */ + @Nullable + abstract String getImage(String url, Document document); + +} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/PrioritizedUrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/PrioritizedUrlHandler.java new file mode 100644 index 0000000..b06ad32 --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/PrioritizedUrlHandler.java @@ -0,0 +1,39 @@ +package com.abin.mallchat.common.common.utils.chain; + +import cn.hutool.core.util.StrUtil; +import com.abin.mallchat.common.common.utils.discover.UrlDiscover; +import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; +import org.jetbrains.annotations.Nullable; +import org.jsoup.nodes.Document; + +import java.util.List; +import java.util.Map; + +/** + * Description: 优先级链接统一处理扩展类 + * Author: achao + * Date: 2023/7/6 9:36 + */ +public class PrioritizedUrlHandler extends FactoryUrlHandler { + + private final FactoryUrlHandler commonUrlHandler = new CommonUrlHandler(); + private final FactoryUrlHandler wxUrlHandler = new WxUrlHandler(); + + @Nullable + @Override + String getTitle(Document document) { + return StrUtil.isBlank(wxUrlHandler.getTitle(document)) ? commonUrlHandler.getTitle(document) : wxUrlHandler.getTitle(document); + } + + @Nullable + @Override + String getDescription(Document document) { + return StrUtil.isBlank(wxUrlHandler.getDescription(document)) ? commonUrlHandler.getDescription(document) : wxUrlHandler.getDescription(document); + } + + @Nullable + @Override + String getImage(String url, Document document) { + return StrUtil.isBlank(wxUrlHandler.getImage(url, document)) ? commonUrlHandler.getImage(url, document) : wxUrlHandler.getImage(url, document); + } +} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/UrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/UrlHandler.java new file mode 100644 index 0000000..c9a9a3d --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/UrlHandler.java @@ -0,0 +1,25 @@ +package com.abin.mallchat.common.common.utils.chain; + +import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; +import org.jsoup.nodes.Document; + +import javax.annotation.Nullable; +import java.util.Map; +import java.util.regex.Pattern; + +/** + * Description: url集合处理抽象接口定义类 + * Author: achao + * Date: 2023/7/6 8:58 + */ +public abstract class UrlHandler { + + /** + * 提取消息中的所有链接,并组装Map + * @param content + * @return + */ + @Nullable + abstract Map getUrlContentMap(String content); + +} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/WxUrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/WxUrlHandler.java new file mode 100644 index 0000000..f8356d9 --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/WxUrlHandler.java @@ -0,0 +1,32 @@ +package com.abin.mallchat.common.common.utils.chain; + +import cn.hutool.core.util.StrUtil; +import org.jetbrains.annotations.Nullable; +import org.jsoup.nodes.Document; + +/** + * Description: + * Author: achao + * Date: 2023/7/6 9:34 + */ +public class WxUrlHandler extends FactoryUrlHandler { + + @Nullable + @Override + public String getTitle(Document document) { + return document.getElementsByAttributeValue("property", "og:title").attr("content"); + } + + @Nullable + @Override + public String getDescription(Document document) { + String description = document.getElementsByAttributeValue("property", "og:description").attr("content"); + return StrUtil.isNotBlank(description) ? description.substring(0, description.indexOf("。")) : description; + } + + @Nullable + @Override + public String getImage(String url, Document document) { + return document.getElementsByAttributeValue("property", "og:image").attr("content"); + } +} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/dto/UrlInfo.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/dto/UrlInfo.java new file mode 100644 index 0000000..ad1050a --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/dto/UrlInfo.java @@ -0,0 +1,32 @@ +package com.abin.mallchat.common.common.utils.chain.dto; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Description: 链接信息提取类 + * Author: achao + * Date: 2023/7/6 8:54 + */ +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class UrlInfo { + /** + * 标题 + **/ + String title; + + /** + * 描述 + **/ + String description; + + /** + * 网站LOGO + **/ + String image; +} From a30d5f27661174788bb13f8759cd175eeb2d8bc6 Mon Sep 17 00:00:00 2001 From: zhaoqichao <1416537683@qq.com> Date: Thu, 6 Jul 2023 10:26:45 +0800 Subject: [PATCH 5/6] =?UTF-8?q?Url=E8=A7=A3=E6=9E=90=E7=BB=84=E4=BB=B6?= =?UTF-8?q?=E4=BC=98=E5=85=88=E7=BA=A7=E8=B0=83=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../common/common/utils/discover/CommonUrlDiscover.java | 4 ---- .../common/common/utils/discover/PrioritizedUrlDiscover.java | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlDiscover.java index b73c81d..2f3b842 100644 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlDiscover.java +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/CommonUrlDiscover.java @@ -31,10 +31,6 @@ public class CommonUrlDiscover extends AbstractUrlDiscover { @Nullable @Override public String getImage(String url, Document document) { - //如果包含og则是微信链接 - if(StrUtil.isNotBlank(document.getElementsByAttributeValue("property", "og:title").attr("content"))){ - return null; - } String image = document.select("link[type=image/x-icon]").attr("href"); //如果没有去匹配含有icon属性的logo String href = StrUtil.isEmpty(image) ? document.select("link[rel$=icon]").attr("href") : image; diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java index 106f598..eef3c11 100644 --- a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/discover/PrioritizedUrlDiscover.java @@ -17,8 +17,8 @@ public class PrioritizedUrlDiscover extends AbstractUrlDiscover { private final List urlDiscovers = new ArrayList<>(2); public PrioritizedUrlDiscover() { - urlDiscovers.add(new CommonUrlDiscover()); urlDiscovers.add(new WxUrlDiscover()); + urlDiscovers.add(new CommonUrlDiscover()); } From eddee5647fa893626197d16d062cc481d6c0c459 Mon Sep 17 00:00:00 2001 From: zhaoqichao <1416537683@qq.com> Date: Thu, 6 Jul 2023 10:28:04 +0800 Subject: [PATCH 6/6] =?UTF-8?q?Url=E8=A7=A3=E6=9E=90=E7=BB=84=E4=BB=B6?= =?UTF-8?q?=E6=A8=A1=E5=BC=8F=E9=87=8D=E6=9E=84=EF=BC=88=E4=B8=8D=E5=BD=B1?= =?UTF-8?q?=E5=93=8D=E6=97=A7=E7=89=88=E6=9C=AC=E4=BD=BF=E7=94=A8=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../common/utils/chain/CommonUrlHandler.java | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/CommonUrlHandler.java diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/CommonUrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/CommonUrlHandler.java new file mode 100644 index 0000000..5d85f00 --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/CommonUrlHandler.java @@ -0,0 +1,51 @@ +package com.abin.mallchat.common.common.utils.chain; + +import cn.hutool.core.util.StrUtil; +import org.jetbrains.annotations.Nullable; +import org.jsoup.nodes.Document; + +/** + * Description: + * Author: achao + * Date: 2023/7/6 9:25 + */ +public class CommonUrlHandler extends FactoryUrlHandler { + + @Nullable + @Override + public String getTitle(Document document) { + return document.title(); + } + + @Nullable + @Override + public String getDescription(Document document) { + String description = document.head().select("meta[name=description]").attr("content"); + String keywords = document.head().select("meta[name=keywords]").attr("content"); + String content = StrUtil.isNotBlank(description) ? description : keywords; + //只保留一句话的描述 + return StrUtil.isNotBlank(content) ? content.substring(0, content.indexOf("。")) : content; + } + + @Nullable + @Override + public String getImage(String url, Document document) { + String image = document.select("link[type=image/x-icon]").attr("href"); + //如果没有去匹配含有icon属性的logo + String href = StrUtil.isEmpty(image) ? document.select("link[rel$=icon]").attr("href") : image; + //如果icon中已经包含了url部分域名 + if (StrUtil.isNotBlank(StrUtil.removeAny(StrUtil.removeAny(href, "/"), "favicon.ico")) && + StrUtil.containsAny(StrUtil.removePrefix(url, "http://"), StrUtil.removeAny(StrUtil.removeAny(href, "/"), "favicon.ico"))) { + return "http://" + StrUtil.removePrefix(href, "/"); + } + //如果url已经包含了logo + if (StrUtil.containsAny(url, "favicon")) { + return url; + } + //如果logo中有url + if (StrUtil.containsAny(href, "http") || StrUtil.containsAny(href, "https")) { + return href; + } + return StrUtil.format("{}/{}", url, StrUtil.removePrefix(href, "/")); + } +}