From 139db78853355fac88790f95d8ebcb62709359da Mon Sep 17 00:00:00 2001 From: JoeamAmier Date: Wed, 30 Aug 2023 20:49:42 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 10 ++++------ main.py | 10 ++++------ source/Download.py | 6 +----- source/Html.py | 2 +- source/__init__.py | 10 +++++----- 5 files changed, 15 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index e7cf887..e9f05f9 100644 --- a/README.md +++ b/README.md @@ -17,17 +17,15 @@ video_demo = "https://www.xiaohongshu.com/explore/64c05652000000000c0378e7" # 实例对象 path = "./" # 作品下载储存根路径,默认值:当前路径 folder = "Download" # 作品下载文件夹名称(自动创建),默认值:Download -headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.203", -} # 请求头 proxies = None # 代理 -timeout = 10 # 网络请求超时限制,默认值:10 +timeout = 5 # 网络请求超时限制,默认值:10 +chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位字节 xhs = XHS( path=path, folder=folder, - headers=headers, proxies=proxies, - timeout=timeout,) # 使用自定义参数 + timeout=timeout, + chunk=chunk, ) # 使用自定义参数 # xhs = XHS() # 使用默认参数 # 无需区分图文和视频作品 # 返回作品详细数据,包括下载地址 diff --git a/main.py b/main.py index ceeeb39..518dec9 100644 --- a/main.py +++ b/main.py @@ -10,17 +10,15 @@ def example(): # 实例对象 path = "./" # 作品下载储存根路径,默认值:当前路径 folder = "Download" # 作品下载文件夹名称(自动创建),默认值:Download - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.203", - } # 请求头 proxies = None # 代理 - timeout = 10 # 网络请求超时限制,默认值:10 + timeout = 5 # 网络请求超时限制,默认值:10 + chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位字节 xhs = XHS( path=path, folder=folder, - headers=headers, proxies=proxies, - timeout=timeout, ) # 使用自定义参数 + timeout=timeout, + chunk=chunk, ) # 使用自定义参数 # xhs = XHS() # 使用默认参数 # 无需区分图文和视频作品 # 返回作品详细数据,包括下载地址 diff --git a/source/Download.py b/source/Download.py index e310fd5..4b55088 100644 --- a/source/Download.py +++ b/source/Download.py @@ -14,7 +14,7 @@ class Download: proxies=None, chunk=256 * 1024, ): self.root = self.init_root(path, folder) - self.headers = self.init_headers(headers) + self.headers = headers self.proxies = { "http": proxies, "https": proxies, @@ -22,10 +22,6 @@ class Download: } self.chunk = chunk - @staticmethod - def init_headers(headers: dict) -> dict: - return {"User-Agent": headers["User-Agent"]} - @staticmethod def init_root(path: str, folder: str) -> Path: root = Path(path).joinpath(folder) diff --git a/source/Html.py b/source/Html.py index bf19e73..b0e4377 100644 --- a/source/Html.py +++ b/source/Html.py @@ -10,7 +10,7 @@ class Html: headers: dict, proxies=None, timeout=10, ): - self.headers = headers + self.headers = headers | {"Referer": "https://www.xiaohongshu.com/", } self.proxies = { "http": proxies, "https": proxies, diff --git a/source/__init__.py b/source/__init__.py index 9edf2e6..03b93e8 100644 --- a/source/__init__.py +++ b/source/__init__.py @@ -12,7 +12,6 @@ from .Video import Video class XHS: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36", - "Referer": "https://www.xiaohongshu.com/", } links = compile(r"https://www.xiaohongshu.com/explore/[0-9a-z]+") @@ -20,14 +19,15 @@ class XHS: self, path="./", folder="Download", - headers=None, proxies=None, - timeout=10): - self.html = Html(headers or self.headers, proxies, timeout) + timeout=10, + chunk=256 * 1024, + ): + self.html = Html(self.headers, proxies, timeout) self.image = Image() self.video = Video() self.explore = Explore() - self.download = Download(path, folder, self.html.headers, proxies) + self.download = Download(path, folder, self.headers, proxies, chunk) def get_image(self, container: dict, html: str, download): urls = self.image.get_image_link(html)