From 301152e0c7b44105a617960c7ea6dd06c8dfbde5 Mon Sep 17 00:00:00 2001 From: JoeamAmier Date: Tue, 29 Aug 2023 19:17:27 +0800 Subject: [PATCH] =?UTF-8?q?=E7=A7=BB=E9=99=A4=20Cookie?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 6 +++--- main.py | 4 +--- source/Download.py | 7 ++++--- source/__init__.py | 9 +-------- 4 files changed, 9 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 7a7917f..e7cf887 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@

使用示例

 # 测试链接
+error_demo = "https://www.xiaohongshu.com/explore/"
 image_demo = "https://www.xiaohongshu.com/explore/64d1b406000000000103ee8d"
 video_demo = "https://www.xiaohongshu.com/explore/64c05652000000000c0378e7"
 # 实例对象
@@ -21,18 +22,17 @@ headers = {
 }  # 请求头
 proxies = None  # 代理
 timeout = 10  # 网络请求超时限制,默认值:10
-cookie = ""  # 小红书网页 cookie,无需登录,获取数据失败时可以尝试手动设置
 xhs = XHS(
     path=path,
     folder=folder,
     headers=headers,
     proxies=proxies,
-    timeout=timeout,
-    cookie=cookie)  # 使用自定义参数
+    timeout=timeout,)  # 使用自定义参数
 # xhs = XHS()  # 使用默认参数
 # 无需区分图文和视频作品
 # 返回作品详细数据,包括下载地址
 download = True  # 启用自动下载作品文件
+print(xhs.extract(error_demo))  # 获取数据失败时返回空字典
 print(xhs.extract(image_demo, download=download))
 print(xhs.extract(video_demo, download=download))
 
diff --git a/main.py b/main.py index e423ecf..ceeeb39 100644 --- a/main.py +++ b/main.py @@ -15,14 +15,12 @@ def example(): } # 请求头 proxies = None # 代理 timeout = 10 # 网络请求超时限制,默认值:10 - cookie = "" # 小红书网页 cookie,无需登录,获取数据失败时可以尝试手动设置 xhs = XHS( path=path, folder=folder, headers=headers, proxies=proxies, - timeout=timeout, - cookie=cookie) # 使用自定义参数 + timeout=timeout, ) # 使用自定义参数 # xhs = XHS() # 使用默认参数 # 无需区分图文和视频作品 # 返回作品详细数据,包括下载地址 diff --git a/source/Download.py b/source/Download.py index 5b89589..e310fd5 100644 --- a/source/Download.py +++ b/source/Download.py @@ -5,14 +5,14 @@ from requests import get class Download: - chunk = 262144 def __init__( self, path, folder, headers: dict, - proxies=None, ): + proxies=None, + chunk=256 * 1024, ): self.root = self.init_root(path, folder) self.headers = self.init_headers(headers) self.proxies = { @@ -20,6 +20,7 @@ class Download: "https": proxies, "ftp": proxies, } + self.chunk = chunk @staticmethod def init_headers(headers: dict) -> dict: @@ -47,4 +48,4 @@ class Download: f.write(chunk) print(f"{name} 下载成功!") except exceptions.ChunkedEncodingError: - print("网络异常,下载文件失败!") + print(f"网络异常,{name} 下载失败!") diff --git a/source/__init__.py b/source/__init__.py index 1ea88c0..9edf2e6 100644 --- a/source/__init__.py +++ b/source/__init__.py @@ -13,7 +13,6 @@ class XHS: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36", "Referer": "https://www.xiaohongshu.com/", - "Cookie": "abRequestId=27dafe41-28af-5b33-9f22-fe05d8c4ac2f; xsecappid=xhs-pc-web; a1=18a363d90c9gw7eaz2krqhj4cx2gtwgotul1wur8950000289463; webId=27fb29ed7ff41eadd4bc58197a465b63; websectiga=cffd9dcea65962b05ab048ac76962acee933d26157113bb213105a116241fa6c; sec_poison_id=3a1e34ee-3535-4ee9-8186-4d574da5291e; web_session=030037a3d84590608f6da85793234a9a6588ed; gid=yY0qKqfd2Y9qyY0qKqfj877FSjkEWd0uJTFA1YjxV4SCJy28k9EklE888JYj4Kq82242dKiY; webBuild=3.6.0; cache_feeds=[]", } links = compile(r"https://www.xiaohongshu.com/explore/[0-9a-z]+") @@ -23,19 +22,13 @@ class XHS: folder="Download", headers=None, proxies=None, - timeout=10, - cookie=None): - self.set_cookie(cookie) + timeout=10): self.html = Html(headers or self.headers, proxies, timeout) self.image = Image() self.video = Video() self.explore = Explore() self.download = Download(path, folder, self.html.headers, proxies) - def set_cookie(self, cookie: str): - if cookie: - self.headers["Cookie"] = cookie - def get_image(self, container: dict, html: str, download): urls = self.image.get_image_link(html) if download: