From 0612dbd39f268e0816cd7e1cd5bc95e12938dc51 Mon Sep 17 00:00:00 2001 From: JoeamAmier Date: Mon, 28 Aug 2023 21:39:44 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E4=BD=9C=E5=93=81=E9=93=BE?= =?UTF-8?q?=E6=8E=A5=E6=A0=BC=E5=BC=8F=E5=88=A4=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 2 ++ source/__init__.py | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/main.py b/main.py index 9a96cea..8b50a71 100644 --- a/main.py +++ b/main.py @@ -4,6 +4,7 @@ from source import XHS def example(): """使用示例""" # 测试链接 + error_demo = "https://www.xiaohongshu.com/explore/" image_demo = "https://www.xiaohongshu.com/explore/64d1b406000000000103ee8d" video_demo = "https://www.xiaohongshu.com/explore/64c05652000000000c0378e7" # 实例对象 @@ -26,6 +27,7 @@ def example(): # 无需区分图文和视频作品 # 返回作品详细数据,包括下载地址 download = True # 启用自动下载作品文件 + print(xhs.extract(error_demo)) # 获取数据失败时返回空字典 print(xhs.extract(image_demo, download=download)) print(xhs.extract(video_demo, download=download)) diff --git a/source/__init__.py b/source/__init__.py index 1b0792d..a155421 100644 --- a/source/__init__.py +++ b/source/__init__.py @@ -1,3 +1,5 @@ +from re import compile + from .Download import Download from .Explore import Explore from .Html import Html @@ -11,6 +13,7 @@ class XHS: "Referer": "https://www.xiaohongshu.com/", "Cookie": "abRequestId=27dafe41-28af-5b33-9f22-fe05d8c4ac2f; xsecappid=xhs-pc-web; a1=18a363d90c9gw7eaz2krqhj4cx2gtwgotul1wur8950000289463; webId=27fb29ed7ff41eadd4bc58197a465b63; websectiga=cffd9dcea65962b05ab048ac76962acee933d26157113bb213105a116241fa6c; sec_poison_id=3a1e34ee-3535-4ee9-8186-4d574da5291e; web_session=030037a3d84590608f6da85793234a9a6588ed; gid=yY0qKqfd2Y9qyY0qKqfj877FSjkEWd0uJTFA1YjxV4SCJy28k9EklE888JYj4Kq82242dKiY; webBuild=3.6.0; cache_feeds=[]", } + links = compile(r"https://www.xiaohongshu.com/explore/[0-9a-z]+") def __init__( self, @@ -44,6 +47,8 @@ class XHS: container["下载地址"] = url def extract(self, url: str, download=False) -> dict: + if not self.check(url): + return {} html = self.html.get_html(url) if not html: return {} @@ -53,3 +58,6 @@ class XHS: else: self.get_image(data, html, download) return data + + def check(self, url: str): + return self.links.match(url)