mirror of
https://github.com/zongzibinbin/MallChat.git
synced 2025-12-26 04:47:53 +08:00
Merge pull request #90 from BigDataQiChao/main
Url解析组件重构;server端UrlMap映射调整;优先级调整;新增Url解析组件模式(不影响旧版本使用)
This commit is contained in:
commit
3a4192aa68
1
MallChat
Submodule
1
MallChat
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit c47b48760dd1eaaed6cf1c62930c65032ed66752
|
||||
@ -1,5 +1,6 @@
|
||||
package com.abin.mallchat.common.chat.domain.entity.msg;
|
||||
|
||||
import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
@ -23,7 +24,7 @@ import java.util.Map;
|
||||
public class MessageExtra implements Serializable {
|
||||
private static final long serialVersionUID = 1L;
|
||||
//url跳转链接
|
||||
private Map<String, String> urlTitleMap;
|
||||
private Map<String, UrlInfo> urlContentMap;
|
||||
//消息撤回详情
|
||||
private MsgRecall recall;
|
||||
//艾特的uid
|
||||
|
||||
@ -0,0 +1,22 @@
|
||||
package com.abin.mallchat.common.common.utils.chain;
|
||||
|
||||
import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Description: 测试
|
||||
* Author: achao
|
||||
* Date: 2023/7/6 9:29
|
||||
*/
|
||||
public class Application {
|
||||
public static void main(String[] args) {
|
||||
PrioritizedUrlHandler handler = new PrioritizedUrlHandler();
|
||||
String longStr = "其中包含一个URL www.baidu.com,一个带有端口号的URL http://www.jd.com:80, 一个带有路径的URL http://mallchat.cn, 还有美团技术文章https://mp.weixin.qq.com/s/hwTf4bDck9_tlFpgVDeIKg ";
|
||||
|
||||
Map<String, UrlInfo> urlContentMap = handler.getUrlContentMap(longStr);
|
||||
System.out.println(urlContentMap);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,51 @@
|
||||
package com.abin.mallchat.common.common.utils.chain;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
/**
|
||||
* Description:
|
||||
* Author: achao
|
||||
* Date: 2023/7/6 9:25
|
||||
*/
|
||||
public class CommonUrlHandler extends FactoryUrlHandler {
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getTitle(Document document) {
|
||||
return document.title();
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getDescription(Document document) {
|
||||
String description = document.head().select("meta[name=description]").attr("content");
|
||||
String keywords = document.head().select("meta[name=keywords]").attr("content");
|
||||
String content = StrUtil.isNotBlank(description) ? description : keywords;
|
||||
//只保留一句话的描述
|
||||
return StrUtil.isNotBlank(content) ? content.substring(0, content.indexOf("。")) : content;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getImage(String url, Document document) {
|
||||
String image = document.select("link[type=image/x-icon]").attr("href");
|
||||
//如果没有去匹配含有icon属性的logo
|
||||
String href = StrUtil.isEmpty(image) ? document.select("link[rel$=icon]").attr("href") : image;
|
||||
//如果icon中已经包含了url部分域名
|
||||
if (StrUtil.isNotBlank(StrUtil.removeAny(StrUtil.removeAny(href, "/"), "favicon.ico")) &&
|
||||
StrUtil.containsAny(StrUtil.removePrefix(url, "http://"), StrUtil.removeAny(StrUtil.removeAny(href, "/"), "favicon.ico"))) {
|
||||
return "http://" + StrUtil.removePrefix(href, "/");
|
||||
}
|
||||
//如果url已经包含了logo
|
||||
if (StrUtil.containsAny(url, "favicon")) {
|
||||
return url;
|
||||
}
|
||||
//如果logo中有url
|
||||
if (StrUtil.containsAny(href, "http") || StrUtil.containsAny(href, "https")) {
|
||||
return href;
|
||||
}
|
||||
return StrUtil.format("{}/{}", url, StrUtil.removePrefix(href, "/"));
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,96 @@
|
||||
package com.abin.mallchat.common.common.utils.chain;
|
||||
|
||||
import cn.hutool.core.util.ReUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import com.abin.mallchat.common.common.utils.FutureUtils;
|
||||
import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.jsoup.Connection;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.springframework.data.util.Pair;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Description: 链接处理工厂
|
||||
* Author: achao
|
||||
* Date: 2023/7/6 9:12
|
||||
*/
|
||||
@Slf4j
|
||||
public abstract class FactoryUrlHandler extends UrlHandler{
|
||||
|
||||
//链接识别的正则
|
||||
private static final Pattern PATTERN = Pattern.compile("((http|https)://)?(www.)?([\\w_-]+(?:(?:\\.[\\w_-]+)+))([\\w.,@?^=%&:/~+#-]*[\\w@?^=%&/~+#-])?");
|
||||
|
||||
@Override
|
||||
@Nullable
|
||||
public Map<String, UrlInfo> getUrlContentMap(String content) {
|
||||
|
||||
if (StrUtil.isBlank(content)) {
|
||||
return new HashMap<>();
|
||||
}
|
||||
List<String> matchList = ReUtil.findAll(PATTERN, content, 0);
|
||||
|
||||
//并行请求
|
||||
List<CompletableFuture<Pair<String, UrlInfo>>> futures = matchList.stream().map(match -> CompletableFuture.supplyAsync(() -> {
|
||||
UrlInfo urlInfo = getContent(match);
|
||||
return Objects.isNull(urlInfo) ? null : Pair.of(match, urlInfo);
|
||||
})).collect(Collectors.toList());
|
||||
CompletableFuture<List<Pair<String, UrlInfo>>> future = FutureUtils.sequenceNonNull(futures);
|
||||
//结果组装
|
||||
return future.join().stream().collect(Collectors.toMap(Pair::getFirst, Pair::getSecond, (a, b) -> a));
|
||||
}
|
||||
|
||||
private UrlInfo getContent(String url){
|
||||
url = !StrUtil.startWith(url, "http") ? "http://" + url : url;
|
||||
Document document = getUrlDocument(url);
|
||||
return UrlInfo.builder()
|
||||
.title(getTitle(document))
|
||||
.description(getDescription(document))
|
||||
.image(getImage(url,document)).build();
|
||||
}
|
||||
|
||||
protected Document getUrlDocument(String matchUrl) {
|
||||
try {
|
||||
Connection connect = Jsoup.connect(matchUrl);
|
||||
connect.timeout(2000);
|
||||
return connect.get();
|
||||
} catch (Exception e) {
|
||||
log.error("find error:url:{}", matchUrl, e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取链接的标题
|
||||
* @param document
|
||||
* @return
|
||||
*/
|
||||
@Nullable
|
||||
abstract String getTitle(Document document);
|
||||
|
||||
/**
|
||||
* 获取链接的描述
|
||||
* @param document
|
||||
* @return
|
||||
*/
|
||||
@Nullable
|
||||
abstract String getDescription(Document document);
|
||||
|
||||
/**
|
||||
* 获取链接的LOGO
|
||||
* @param document
|
||||
* @return
|
||||
*/
|
||||
@Nullable
|
||||
abstract String getImage(String url, Document document);
|
||||
|
||||
}
|
||||
@ -0,0 +1,39 @@
|
||||
package com.abin.mallchat.common.common.utils.chain;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import com.abin.mallchat.common.common.utils.discover.UrlDiscover;
|
||||
import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Description: 优先级链接统一处理扩展类
|
||||
* Author: achao
|
||||
* Date: 2023/7/6 9:36
|
||||
*/
|
||||
public class PrioritizedUrlHandler extends FactoryUrlHandler {
|
||||
|
||||
private final FactoryUrlHandler commonUrlHandler = new CommonUrlHandler();
|
||||
private final FactoryUrlHandler wxUrlHandler = new WxUrlHandler();
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
String getTitle(Document document) {
|
||||
return StrUtil.isBlank(wxUrlHandler.getTitle(document)) ? commonUrlHandler.getTitle(document) : wxUrlHandler.getTitle(document);
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
String getDescription(Document document) {
|
||||
return StrUtil.isBlank(wxUrlHandler.getDescription(document)) ? commonUrlHandler.getDescription(document) : wxUrlHandler.getDescription(document);
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
String getImage(String url, Document document) {
|
||||
return StrUtil.isBlank(wxUrlHandler.getImage(url, document)) ? commonUrlHandler.getImage(url, document) : wxUrlHandler.getImage(url, document);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,25 @@
|
||||
package com.abin.mallchat.common.common.utils.chain;
|
||||
|
||||
import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Description: url集合处理抽象接口定义类
|
||||
* Author: achao
|
||||
* Date: 2023/7/6 8:58
|
||||
*/
|
||||
public abstract class UrlHandler {
|
||||
|
||||
/**
|
||||
* 提取消息中的所有链接,并组装Map
|
||||
* @param content
|
||||
* @return
|
||||
*/
|
||||
@Nullable
|
||||
abstract Map<String,UrlInfo> getUrlContentMap(String content);
|
||||
|
||||
}
|
||||
@ -0,0 +1,32 @@
|
||||
package com.abin.mallchat.common.common.utils.chain;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
/**
|
||||
* Description:
|
||||
* Author: achao
|
||||
* Date: 2023/7/6 9:34
|
||||
*/
|
||||
public class WxUrlHandler extends FactoryUrlHandler {
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getTitle(Document document) {
|
||||
return document.getElementsByAttributeValue("property", "og:title").attr("content");
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getDescription(Document document) {
|
||||
String description = document.getElementsByAttributeValue("property", "og:description").attr("content");
|
||||
return StrUtil.isNotBlank(description) ? description.substring(0, description.indexOf("。")) : description;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getImage(String url, Document document) {
|
||||
return document.getElementsByAttributeValue("property", "og:image").attr("content");
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,32 @@
|
||||
package com.abin.mallchat.common.common.utils.chain.dto;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
/**
|
||||
* Description: 链接信息提取类
|
||||
* Author: achao
|
||||
* Date: 2023/7/6 8:54
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class UrlInfo {
|
||||
/**
|
||||
* 标题
|
||||
**/
|
||||
String title;
|
||||
|
||||
/**
|
||||
* 描述
|
||||
**/
|
||||
String description;
|
||||
|
||||
/**
|
||||
* 网站LOGO
|
||||
**/
|
||||
String image;
|
||||
}
|
||||
@ -3,14 +3,14 @@ package com.abin.mallchat.common.common.utils.discover;
|
||||
import cn.hutool.core.util.ReUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import com.abin.mallchat.common.common.utils.FutureUtils;
|
||||
import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.jsoup.Connection;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.springframework.data.util.Pair;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@ -20,46 +20,55 @@ import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Description: urlTitle查询抽象类
|
||||
* Author: <a href="https://github.com/zongzibinbin">abin</a>
|
||||
* Date: 2023-05-27
|
||||
* @author zhaoqichao
|
||||
* @date 2023/7/3 16:38
|
||||
*/
|
||||
@Slf4j
|
||||
public abstract class AbstractUrlTitleDiscover implements UrlTitleDiscover {
|
||||
public abstract class AbstractUrlDiscover implements UrlDiscover {
|
||||
//链接识别的正则
|
||||
private static final Pattern PATTERN = Pattern.compile("((http|https)://)?(www.)?([\\w_-]+(?:(?:\\.[\\w_-]+)+))([\\w.,@?^=%&:/~+#-]*[\\w@?^=%&/~+#-])?");
|
||||
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public Map<String, String> getContentTitleMap(String content) {
|
||||
public Map<String, UrlInfo> getUrlContentMap(String content) {
|
||||
|
||||
if (StrUtil.isBlank(content)) {
|
||||
return new HashMap<>();
|
||||
}
|
||||
List<String> matchList = ReUtil.findAll(PATTERN, content, 0);
|
||||
|
||||
//并行请求
|
||||
List<CompletableFuture<Pair<String, String>>> futures = matchList.stream().map(match -> CompletableFuture.supplyAsync(() -> {
|
||||
String title = getUrlTitle(match);
|
||||
return StringUtils.isNotEmpty(title) ? Pair.of(match, title) : null;
|
||||
List<CompletableFuture<Pair<String, UrlInfo>>> futures = matchList.stream().map(match -> CompletableFuture.supplyAsync(() -> {
|
||||
UrlInfo urlInfo = getContent(match);
|
||||
return Objects.isNull(urlInfo) ? null : Pair.of(match, urlInfo);
|
||||
})).collect(Collectors.toList());
|
||||
CompletableFuture<List<Pair<String, String>>> future = FutureUtils.sequenceNonNull(futures);
|
||||
CompletableFuture<List<Pair<String, UrlInfo>>> future = FutureUtils.sequenceNonNull(futures);
|
||||
//结果组装
|
||||
return future.join().stream().collect(Collectors.toMap(Pair::getFirst, Pair::getSecond, (a, b) -> a));
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getUrlTitle(String url) {
|
||||
public UrlInfo getContent(String url) {
|
||||
Document document = getUrlDocument(assemble(url));
|
||||
if (Objects.isNull(document)) {
|
||||
return null;
|
||||
}
|
||||
return getDocTitle(document);
|
||||
|
||||
return UrlInfo.builder()
|
||||
.title(getTitle(document))
|
||||
.description(getDescription(document))
|
||||
.image(getImage(assemble(url),document)).build();
|
||||
}
|
||||
|
||||
|
||||
private String assemble(String url) {
|
||||
|
||||
if (!StrUtil.startWith(url, "http")) {
|
||||
return "http://" + url;
|
||||
}
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
@ -69,8 +78,9 @@ public abstract class AbstractUrlTitleDiscover implements UrlTitleDiscover {
|
||||
connect.timeout(2000);
|
||||
return connect.get();
|
||||
} catch (Exception e) {
|
||||
log.error("find title error:url:{}", matchUrl, e);
|
||||
log.error("find error:url:{}", matchUrl, e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,52 @@
|
||||
package com.abin.mallchat.common.common.utils.discover;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import io.jsonwebtoken.lang.Objects;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
/**
|
||||
* @author zhaoqichao
|
||||
* @date 2023/7/3 16:54
|
||||
*/
|
||||
public class CommonUrlDiscover extends AbstractUrlDiscover {
|
||||
@Nullable
|
||||
@Override
|
||||
public String getTitle(Document document) {
|
||||
return document.title();
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getDescription(Document document) {
|
||||
String description = document.head().select("meta[name=description]").attr("content");
|
||||
String keywords = document.head().select("meta[name=keywords]").attr("content");
|
||||
String content = StrUtil.isNotBlank(description) ? description : keywords;
|
||||
//只保留一句话的描述
|
||||
return StrUtil.isNotBlank(content) ? content.substring(0, content.indexOf("。")) : content;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getImage(String url, Document document) {
|
||||
String image = document.select("link[type=image/x-icon]").attr("href");
|
||||
//如果没有去匹配含有icon属性的logo
|
||||
String href = StrUtil.isEmpty(image) ? document.select("link[rel$=icon]").attr("href") : image;
|
||||
//如果icon中已经包含了url部分域名
|
||||
if (StrUtil.isNotBlank(StrUtil.removeAny(StrUtil.removeAny(href, "/"), "favicon.ico")) &&
|
||||
StrUtil.containsAny(StrUtil.removePrefix(url, "http://"), StrUtil.removeAny(StrUtil.removeAny(href, "/"), "favicon.ico"))) {
|
||||
return "http://" + StrUtil.removePrefix(href, "/");
|
||||
}
|
||||
//如果url已经包含了logo
|
||||
if (StrUtil.containsAny(url, "favicon")) {
|
||||
return url;
|
||||
}
|
||||
//如果logo中有url
|
||||
if (StrUtil.containsAny(href, "http") || StrUtil.containsAny(href, "https")) {
|
||||
return href;
|
||||
}
|
||||
return StrUtil.format("{}/{}", url, StrUtil.removePrefix(href, "/"));
|
||||
}
|
||||
}
|
||||
@ -1,15 +0,0 @@
|
||||
package com.abin.mallchat.common.common.utils.discover;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
/**
|
||||
* Description: 通用的标题解析类
|
||||
* Author: <a href="https://github.com/zongzibinbin">abin</a>
|
||||
* Date: 2023-05-27
|
||||
*/
|
||||
public class CommonUrlTitleDiscover extends AbstractUrlTitleDiscover {
|
||||
@Override
|
||||
public String getDocTitle(Document document) {
|
||||
return document.title();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,60 @@
|
||||
package com.abin.mallchat.common.common.utils.discover;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Description: 具有优先级的title查询器
|
||||
* Author: <a href="https://github.com/zongzibinbin">abin</a>
|
||||
* Date: 2023-05-27
|
||||
*/
|
||||
public class PrioritizedUrlDiscover extends AbstractUrlDiscover {
|
||||
|
||||
private final List<UrlDiscover> urlDiscovers = new ArrayList<>(2);
|
||||
|
||||
public PrioritizedUrlDiscover() {
|
||||
urlDiscovers.add(new WxUrlDiscover());
|
||||
urlDiscovers.add(new CommonUrlDiscover());
|
||||
}
|
||||
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getTitle(Document document) {
|
||||
for (UrlDiscover urlDiscover : urlDiscovers) {
|
||||
String urlTitle = urlDiscover.getTitle(document);
|
||||
if (StrUtil.isNotBlank(urlTitle)) {
|
||||
return urlTitle;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getDescription(Document document) {
|
||||
for (UrlDiscover urlDiscover : urlDiscovers) {
|
||||
String urlDescription = urlDiscover.getDescription(document);
|
||||
if (StrUtil.isNotBlank(urlDescription)) {
|
||||
return urlDescription;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getImage(String url, Document document) {
|
||||
for (UrlDiscover urlDiscover : urlDiscovers) {
|
||||
String urlImage = urlDiscover.getImage(url,document);
|
||||
if (StrUtil.isNotBlank(urlImage)) {
|
||||
return urlImage;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@ -1,33 +0,0 @@
|
||||
package com.abin.mallchat.common.common.utils.discover;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Description: 具有优先级的title查询器
|
||||
* Author: <a href="https://github.com/zongzibinbin">abin</a>
|
||||
* Date: 2023-05-27
|
||||
*/
|
||||
public class PrioritizedUrlTitleDiscover extends AbstractUrlTitleDiscover {
|
||||
|
||||
private final List<UrlTitleDiscover> urlTitleDiscovers = new ArrayList<>(2);
|
||||
|
||||
public PrioritizedUrlTitleDiscover() {
|
||||
urlTitleDiscovers.add(new CommonUrlTitleDiscover());
|
||||
urlTitleDiscovers.add(new WxUrlTitleDiscover());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDocTitle(Document document) {
|
||||
for (UrlTitleDiscover urlTitleDiscover : urlTitleDiscovers) {
|
||||
String urlTitle = urlTitleDiscover.getDocTitle(document);
|
||||
if (StrUtil.isNotBlank(urlTitle)) {
|
||||
return urlTitle;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,46 @@
|
||||
package com.abin.mallchat.common.common.utils.discover;
|
||||
|
||||
import cn.hutool.core.date.StopWatch;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author zhaoqichao
|
||||
* @date 2023/7/3 16:34
|
||||
*/
|
||||
public interface UrlDiscover {
|
||||
|
||||
|
||||
@Nullable
|
||||
Map<String,UrlInfo> getUrlContentMap(String content);
|
||||
|
||||
@Nullable
|
||||
UrlInfo getContent(String url);
|
||||
|
||||
@Nullable
|
||||
String getTitle(Document document);
|
||||
|
||||
@Nullable
|
||||
String getDescription(Document document);
|
||||
|
||||
@Nullable
|
||||
String getImage(String url, Document document);
|
||||
|
||||
public static void main(String[] args) {
|
||||
StopWatch stopWatch = new StopWatch();
|
||||
stopWatch.start();
|
||||
String longStr = "其中包含一个URL www.baidu.com,一个带有端口号的URL http://www.jd.com:80, 一个带有路径的URL http://mallchat.cn, 还有美团技术文章https://mp.weixin.qq.com/s/hwTf4bDck9_tlFpgVDeIKg ";
|
||||
// String longStr = "一个带有端口号的URL http://www.jd.com:80,";
|
||||
// String longStr = "一个带有路径的URL http://mallchat.cn";
|
||||
PrioritizedUrlDiscover discover = new PrioritizedUrlDiscover();
|
||||
final Map<String, UrlInfo> map = discover.getUrlContentMap(longStr);
|
||||
System.out.println(map);
|
||||
stopWatch.stop();
|
||||
long cost = stopWatch.getTotalTimeMillis();
|
||||
System.out.println(cost);
|
||||
}
|
||||
}
|
||||
@ -1,35 +0,0 @@
|
||||
package com.abin.mallchat.common.common.utils.discover;
|
||||
|
||||
import cn.hutool.core.date.StopWatch;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.Map;
|
||||
|
||||
public interface UrlTitleDiscover {
|
||||
|
||||
|
||||
@Nullable
|
||||
Map<String, String> getContentTitleMap(String content);
|
||||
|
||||
|
||||
@Nullable
|
||||
String getUrlTitle(String url);
|
||||
|
||||
@Nullable
|
||||
String getDocTitle(Document document);
|
||||
|
||||
public static void main(String[] args) {//用异步多任务查询并合并 974 //串行访问的速度1349 1291 1283 1559
|
||||
StopWatch stopWatch = new StopWatch();
|
||||
stopWatch.start();
|
||||
String longStr = "这是一个很长的字符串再来 www.github.com,其中包含一个URL www.baidu.com,, 一个带有端口号的URL http://www.jd.com:80, 一个带有路径的URL http://mallchat.cn, 还有美团技术文章https://mp.weixin.qq.com/s/hwTf4bDck9_tlFpgVDeIKg ";
|
||||
PrioritizedUrlTitleDiscover discover = new PrioritizedUrlTitleDiscover();
|
||||
Map<String, String> contentTitleMap = discover.getContentTitleMap(longStr);
|
||||
System.out.println(contentTitleMap);
|
||||
//
|
||||
// Jsoup.connect("http:// www.github.com");
|
||||
stopWatch.stop();
|
||||
long cost = stopWatch.getTotalTimeMillis();
|
||||
System.out.println(cost);
|
||||
}//{http://mallchat.cn=MallChat, www.baidu.com=百度一下,你就知道, https://mp.weixin.qq.com/s/hwTf4bDck9_tlFpgVDeIKg=超大规模数据库集群保稳系列之二:数据库攻防演练建设实践, http://www.jd.com:80=京东(JD.COM)-正品低价、品质保障、配送及时、轻松购物!}
|
||||
}
|
||||
@ -0,0 +1,30 @@
|
||||
package com.abin.mallchat.common.common.utils.discover;
|
||||
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
/**
|
||||
* Description: 针对微信公众号文章的标题获取类
|
||||
* Author: <a href="https://github.com/zongzibinbin">abin</a>
|
||||
* Date: 2023-05-27
|
||||
*/
|
||||
public class WxUrlDiscover extends AbstractUrlDiscover {
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getTitle(Document document) {
|
||||
return document.getElementsByAttributeValue("property", "og:title").attr("content");
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getDescription(Document document) {
|
||||
return document.getElementsByAttributeValue("property", "og:description").attr("content");
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getImage(String url, Document document) {
|
||||
return document.getElementsByAttributeValue("property", "og:image").attr("content");
|
||||
}
|
||||
}
|
||||
@ -1,15 +0,0 @@
|
||||
package com.abin.mallchat.common.common.utils.discover;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
/**
|
||||
* Description: 针对微信公众号文章的标题获取类
|
||||
* Author: <a href="https://github.com/zongzibinbin">abin</a>
|
||||
* Date: 2023-05-27
|
||||
*/
|
||||
public class WxUrlTitleDiscover extends AbstractUrlTitleDiscover {
|
||||
@Override
|
||||
public String getDocTitle(Document document) {
|
||||
return document.getElementsByAttributeValue("property", "og:title").attr("content");
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,32 @@
|
||||
package com.abin.mallchat.common.common.utils.discover.domain;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
/**
|
||||
* @author zhaoqichao
|
||||
* @date 2023/7/3 16:12
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class UrlInfo {
|
||||
/**
|
||||
* 标题
|
||||
**/
|
||||
String title;
|
||||
|
||||
/**
|
||||
* 描述
|
||||
**/
|
||||
String description;
|
||||
|
||||
/**
|
||||
* 网站LOGO
|
||||
**/
|
||||
String image;
|
||||
|
||||
}
|
||||
@ -1,5 +1,6 @@
|
||||
package com.abin.mallchat.custom.chat.domain.vo.response;
|
||||
|
||||
import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
|
||||
import io.swagger.annotations.ApiModelProperty;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
@ -50,7 +51,7 @@ public class ChatMessageResp {
|
||||
private String content;
|
||||
@ApiModelProperty("消息链接映射-废弃")
|
||||
@Deprecated
|
||||
private Map<String, String> urlTitleMap;
|
||||
private Map<String, UrlInfo> urlContentMap;
|
||||
@ApiModelProperty("消息类型 1正常文本 2.撤回消息")
|
||||
private Integer type;
|
||||
@ApiModelProperty("消息内容不同的消息类型,内容体不同,见https://www.yuque.com/snab/mallcaht/rkb2uz5k1qqdmcmd")
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
package com.abin.mallchat.custom.chat.domain.vo.response.msg;
|
||||
|
||||
import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
|
||||
import io.swagger.annotations.ApiModelProperty;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
@ -22,7 +23,7 @@ public class TextMsgResp {
|
||||
@ApiModelProperty("消息内容")
|
||||
private String content;
|
||||
@ApiModelProperty("消息链接映射")
|
||||
private Map<String, String> urlTitleMap;
|
||||
private Map<String, UrlInfo> urlContentMap;
|
||||
@ApiModelProperty("艾特的uid")
|
||||
private List<Long> atUidList;
|
||||
@ApiModelProperty("父消息,如果没有父消息,返回的是null")
|
||||
|
||||
@ -56,7 +56,7 @@ public class MessageAdapter {
|
||||
messageVO.setSendTime(message.getCreateTime());
|
||||
AbstractMsgHandler msgHandler = MsgHandlerFactory.getStrategyNoNull(message.getType());
|
||||
messageVO.setBody(msgHandler.showMsg(message));
|
||||
messageVO.setUrlTitleMap(Optional.ofNullable(message.getExtra()).map(MessageExtra::getUrlTitleMap).orElse(null));
|
||||
messageVO.setUrlContentMap(Optional.ofNullable(message.getExtra()).map(MessageExtra::getUrlContentMap).orElse(null));
|
||||
Message replyMessage = replyMap.get(message.getReplyMsgId());
|
||||
|
||||
//回复消息
|
||||
|
||||
@ -11,7 +11,8 @@ import com.abin.mallchat.common.chat.service.cache.MsgCache;
|
||||
import com.abin.mallchat.common.common.domain.enums.YesOrNoEnum;
|
||||
import com.abin.mallchat.common.common.utils.AssertUtil;
|
||||
import com.abin.mallchat.common.common.utils.SensitiveWordUtils;
|
||||
import com.abin.mallchat.common.common.utils.discover.PrioritizedUrlTitleDiscover;
|
||||
import com.abin.mallchat.common.common.utils.discover.PrioritizedUrlDiscover;
|
||||
import com.abin.mallchat.common.common.utils.discover.domain.UrlInfo;
|
||||
import com.abin.mallchat.common.user.domain.entity.User;
|
||||
import com.abin.mallchat.common.user.domain.enums.RoleEnum;
|
||||
import com.abin.mallchat.common.user.service.IRoleService;
|
||||
@ -47,7 +48,7 @@ public class TextMsgHandler extends AbstractMsgHandler {
|
||||
@Autowired
|
||||
private IRoleService iRoleService;
|
||||
|
||||
private static final PrioritizedUrlTitleDiscover URL_TITLE_DISCOVER = new PrioritizedUrlTitleDiscover();
|
||||
private static final PrioritizedUrlDiscover URL_TITLE_DISCOVER = new PrioritizedUrlDiscover();
|
||||
|
||||
@Override
|
||||
MessageTypeEnum getMsgTypeEnum() {
|
||||
@ -91,8 +92,8 @@ public class TextMsgHandler extends AbstractMsgHandler {
|
||||
|
||||
}
|
||||
//判断消息url跳转
|
||||
Map<String, String> urlTitleMap = URL_TITLE_DISCOVER.getContentTitleMap(body.getContent());
|
||||
extra.setUrlTitleMap(urlTitleMap);
|
||||
Map<String, UrlInfo> urlContentMap = URL_TITLE_DISCOVER.getUrlContentMap(body.getContent());
|
||||
extra.setUrlContentMap(urlContentMap);
|
||||
//艾特功能
|
||||
if (CollectionUtil.isNotEmpty(body.getAtUidList())) {
|
||||
extra.setAtUidList(body.getAtUidList());
|
||||
@ -106,7 +107,7 @@ public class TextMsgHandler extends AbstractMsgHandler {
|
||||
public Object showMsg(Message msg) {
|
||||
TextMsgResp resp = new TextMsgResp();
|
||||
resp.setContent(msg.getContent());
|
||||
resp.setUrlTitleMap(Optional.ofNullable(msg.getExtra()).map(MessageExtra::getUrlTitleMap).orElse(null));
|
||||
resp.setUrlContentMap(Optional.ofNullable(msg.getExtra()).map(MessageExtra::getUrlContentMap).orElse(null));
|
||||
resp.setAtUidList(Optional.ofNullable(msg.getExtra()).map(MessageExtra::getAtUidList).orElse(null));
|
||||
//回复消息
|
||||
Optional<Message> reply = Optional.ofNullable(msg.getReplyMsgId())
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user