diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/Application.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/Application.java new file mode 100644 index 0000000..b2c5d70 --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/Application.java @@ -0,0 +1,22 @@ +package com.abin.mallchat.common.common.utils.chain; + +import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; +import org.jetbrains.annotations.Nullable; +import org.jsoup.nodes.Document; + +import java.util.Map; + +/** + * Description: 测试 + * Author: achao + * Date: 2023/7/6 9:29 + */ +public class Application { + public static void main(String[] args) { + PrioritizedUrlHandler handler = new PrioritizedUrlHandler(); + String longStr = "其中包含一个URL www.baidu.com,一个带有端口号的URL http://www.jd.com:80, 一个带有路径的URL http://mallchat.cn, 还有美团技术文章https://mp.weixin.qq.com/s/hwTf4bDck9_tlFpgVDeIKg "; + + Map urlContentMap = handler.getUrlContentMap(longStr); + System.out.println(urlContentMap); + } +} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/FactoryUrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/FactoryUrlHandler.java new file mode 100644 index 0000000..bd8d0e8 --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/FactoryUrlHandler.java @@ -0,0 +1,96 @@ +package com.abin.mallchat.common.common.utils.chain; + +import cn.hutool.core.util.ReUtil; +import cn.hutool.core.util.StrUtil; +import com.abin.mallchat.common.common.utils.FutureUtils; +import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; +import lombok.extern.slf4j.Slf4j; +import org.jsoup.Connection; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.springframework.data.util.Pair; + +import javax.annotation.Nullable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.CompletableFuture; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * Description: 链接处理工厂 + * Author: achao + * Date: 2023/7/6 9:12 + */ +@Slf4j +public abstract class FactoryUrlHandler extends UrlHandler{ + + //链接识别的正则 + private static final Pattern PATTERN = Pattern.compile("((http|https)://)?(www.)?([\\w_-]+(?:(?:\\.[\\w_-]+)+))([\\w.,@?^=%&:/~+#-]*[\\w@?^=%&/~+#-])?"); + + @Override + @Nullable + public Map getUrlContentMap(String content) { + + if (StrUtil.isBlank(content)) { + return new HashMap<>(); + } + List matchList = ReUtil.findAll(PATTERN, content, 0); + + //并行请求 + List>> futures = matchList.stream().map(match -> CompletableFuture.supplyAsync(() -> { + UrlInfo urlInfo = getContent(match); + return Objects.isNull(urlInfo) ? null : Pair.of(match, urlInfo); + })).collect(Collectors.toList()); + CompletableFuture>> future = FutureUtils.sequenceNonNull(futures); + //结果组装 + return future.join().stream().collect(Collectors.toMap(Pair::getFirst, Pair::getSecond, (a, b) -> a)); + } + + private UrlInfo getContent(String url){ + url = !StrUtil.startWith(url, "http") ? "http://" + url : url; + Document document = getUrlDocument(url); + return UrlInfo.builder() + .title(getTitle(document)) + .description(getDescription(document)) + .image(getImage(url,document)).build(); + } + + protected Document getUrlDocument(String matchUrl) { + try { + Connection connect = Jsoup.connect(matchUrl); + connect.timeout(2000); + return connect.get(); + } catch (Exception e) { + log.error("find error:url:{}", matchUrl, e); + } + return null; + } + + /** + * 获取链接的标题 + * @param document + * @return + */ + @Nullable + abstract String getTitle(Document document); + + /** + * 获取链接的描述 + * @param document + * @return + */ + @Nullable + abstract String getDescription(Document document); + + /** + * 获取链接的LOGO + * @param document + * @return + */ + @Nullable + abstract String getImage(String url, Document document); + +} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/PrioritizedUrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/PrioritizedUrlHandler.java new file mode 100644 index 0000000..b06ad32 --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/PrioritizedUrlHandler.java @@ -0,0 +1,39 @@ +package com.abin.mallchat.common.common.utils.chain; + +import cn.hutool.core.util.StrUtil; +import com.abin.mallchat.common.common.utils.discover.UrlDiscover; +import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; +import org.jetbrains.annotations.Nullable; +import org.jsoup.nodes.Document; + +import java.util.List; +import java.util.Map; + +/** + * Description: 优先级链接统一处理扩展类 + * Author: achao + * Date: 2023/7/6 9:36 + */ +public class PrioritizedUrlHandler extends FactoryUrlHandler { + + private final FactoryUrlHandler commonUrlHandler = new CommonUrlHandler(); + private final FactoryUrlHandler wxUrlHandler = new WxUrlHandler(); + + @Nullable + @Override + String getTitle(Document document) { + return StrUtil.isBlank(wxUrlHandler.getTitle(document)) ? commonUrlHandler.getTitle(document) : wxUrlHandler.getTitle(document); + } + + @Nullable + @Override + String getDescription(Document document) { + return StrUtil.isBlank(wxUrlHandler.getDescription(document)) ? commonUrlHandler.getDescription(document) : wxUrlHandler.getDescription(document); + } + + @Nullable + @Override + String getImage(String url, Document document) { + return StrUtil.isBlank(wxUrlHandler.getImage(url, document)) ? commonUrlHandler.getImage(url, document) : wxUrlHandler.getImage(url, document); + } +} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/UrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/UrlHandler.java new file mode 100644 index 0000000..c9a9a3d --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/UrlHandler.java @@ -0,0 +1,25 @@ +package com.abin.mallchat.common.common.utils.chain; + +import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo; +import org.jsoup.nodes.Document; + +import javax.annotation.Nullable; +import java.util.Map; +import java.util.regex.Pattern; + +/** + * Description: url集合处理抽象接口定义类 + * Author: achao + * Date: 2023/7/6 8:58 + */ +public abstract class UrlHandler { + + /** + * 提取消息中的所有链接,并组装Map + * @param content + * @return + */ + @Nullable + abstract Map getUrlContentMap(String content); + +} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/WxUrlHandler.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/WxUrlHandler.java new file mode 100644 index 0000000..f8356d9 --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/WxUrlHandler.java @@ -0,0 +1,32 @@ +package com.abin.mallchat.common.common.utils.chain; + +import cn.hutool.core.util.StrUtil; +import org.jetbrains.annotations.Nullable; +import org.jsoup.nodes.Document; + +/** + * Description: + * Author: achao + * Date: 2023/7/6 9:34 + */ +public class WxUrlHandler extends FactoryUrlHandler { + + @Nullable + @Override + public String getTitle(Document document) { + return document.getElementsByAttributeValue("property", "og:title").attr("content"); + } + + @Nullable + @Override + public String getDescription(Document document) { + String description = document.getElementsByAttributeValue("property", "og:description").attr("content"); + return StrUtil.isNotBlank(description) ? description.substring(0, description.indexOf("。")) : description; + } + + @Nullable + @Override + public String getImage(String url, Document document) { + return document.getElementsByAttributeValue("property", "og:image").attr("content"); + } +} diff --git a/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/dto/UrlInfo.java b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/dto/UrlInfo.java new file mode 100644 index 0000000..ad1050a --- /dev/null +++ b/mallchat-common/src/main/java/com/abin/mallchat/common/common/utils/chain/dto/UrlInfo.java @@ -0,0 +1,32 @@ +package com.abin.mallchat.common.common.utils.chain.dto; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Description: 链接信息提取类 + * Author: achao + * Date: 2023/7/6 8:54 + */ +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class UrlInfo { + /** + * 标题 + **/ + String title; + + /** + * 描述 + **/ + String description; + + /** + * 网站LOGO + **/ + String image; +}