diff --git a/README.md b/README.md
index b01126d..3a5f585 100644
--- a/README.md
+++ b/README.md
@@ -11,10 +11,11 @@
📑 功能清单
- ✅ 采集小红书图文/视频作品信息
-- ✅ 获取小红书图文/视频作品文件下载地址
+- ✅ 提取小红书图文/视频作品文件下载地址
- ✅ 下载小红书无水印图文/视频作品文件
- ✅ 自动跳过已下载的作品文件
- ✅ 作品文件完整性处理机制
+- ☑️ 采集作品信息储存至文件
📸 程序截图
@@ -25,7 +26,7 @@
https://www.xiaohongshu.com/discovery/item/作品ID
https://xhslink.com/分享码
-可以单次输入多个作品链接,链接之间使用空格分隔。
+支持单次输入多个作品链接,链接之间使用空格分隔。
🪟 关于终端
⭐ 推荐使用 Windows 终端 (Windows 11 自带默认终端)运行程序以便获得最佳显示效果!
@@ -41,7 +42,7 @@
运行 main.py 即可使用
💻 二次开发
-如果想要获取小红书图文/视频作品信息,可以根据 main.py 的注释提示进行代码调用。
+如果需要获取小红书图文/视频作品信息,可以根据 main.py 的注释提示进行代码调用。
# 测试链接
error_demo = "https://github.com/JoeanAmier/XHS_Downloader"
@@ -49,24 +50,26 @@ image_demo = "https://www.xiaohongshu.com/explore/63b275a30000000019020185"
video_demo = "https://www.xiaohongshu.com/explore/64edb460000000001f03cadc"
multiple_demo = f"{image_demo} {video_demo}"
# 实例对象
-path = "D:\\" # 作品下载储存根路径,默认值:当前路径
+path = "" # 作品下载储存根路径,默认值:当前路径
folder = "Download" # 作品下载文件夹名称(自动创建),默认值:Download
-proxies = None # 网络代理
+user_agent = "" # 请求头 User-Agent
+proxy = None # 网络代理
timeout = 5 # 网络请求超时限制,默认值:10
chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位字节
-# with XHS() as xhs:
+# async with XHS() as xhs:
# pass # 使用默认参数
-with XHS(path=path,
- folder=folder,
- proxies=proxies,
- timeout=timeout,
- chunk=chunk) as xhs: # 使用自定义参数
+async with XHS(path=path,
+ folder=folder,
+ user_agent=user_agent,
+ proxy=proxy,
+ timeout=timeout,
+ chunk=chunk) as xhs: # 使用自定义参数
download = True # 是否下载作品文件,默认值:False
# 返回作品详细信息,包括下载地址
- print(xhs.extract(error_demo)) # 获取数据失败时返回空字典
- print(xhs.extract(image_demo, download=download))
- print(xhs.extract(video_demo, download=download))
- print(xhs.extract(multiple_demo, download=download))
+ print(await xhs.extract(error_demo, download=download)) # 获取数据失败时返回空字典
+ print(await xhs.extract(image_demo, download=download))
+ print(await xhs.extract(video_demo, download=download))
+ print(await xhs.extract(multiple_demo, download=download)) # 支持传入多个作品链接
⚙️ 配置文件
项目根目录下的 settings.json 文件,首次运行自动生成,可以自定义部分运行参数。
@@ -83,7 +86,7 @@ with XHS(path=path,
| path |
str |
-作品文件储存根路径 |
+作品数据 / 文件保存根路径 |
项目根路径 |
@@ -93,6 +96,12 @@ with XHS(path=path,
| Download |
+| user_agent |
+str |
+请求头 User-Agent |
+内置 UA |
+
+
| proxy |
str |
设置代理 |
diff --git a/main.py b/main.py
index 46bad1f..2f7d36f 100644
--- a/main.py
+++ b/main.py
@@ -11,24 +11,26 @@ async def example():
video_demo = "https://www.xiaohongshu.com/explore/64edb460000000001f03cadc"
multiple_demo = f"{image_demo} {video_demo}"
# 实例对象
- path = "D:\\" # 作品下载储存根路径,默认值:当前路径
+ path = "" # 作品下载储存根路径,默认值:当前路径
folder = "Download" # 作品下载文件夹名称(自动创建),默认值:Download
- proxies = None # 网络代理
+ user_agent = "" # 请求头 User-Agent
+ proxy = None # 网络代理
timeout = 5 # 网络请求超时限制,默认值:10
chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位字节
- async with XHS() as xhs:
- pass # 使用默认参数
+ # async with XHS() as xhs:
+ # pass # 使用默认参数
async with XHS(path=path,
folder=folder,
- proxy=proxies,
+ user_agent=user_agent,
+ proxy=proxy,
timeout=timeout,
chunk=chunk) as xhs: # 使用自定义参数
- download = False # 是否下载作品文件,默认值:False
+ download = True # 是否下载作品文件,默认值:False
# 返回作品详细信息,包括下载地址
- print(await xhs.extract(error_demo)) # 获取数据失败时返回空字典
+ print(await xhs.extract(error_demo, download=download)) # 获取数据失败时返回空字典
print(await xhs.extract(image_demo, download=download))
print(await xhs.extract(video_demo, download=download))
- print(await xhs.extract(multiple_demo, download=download))
+ print(await xhs.extract(multiple_demo, download=download)) # 支持传入多个作品链接
if __name__ == '__main__':
diff --git a/source/Downloader.py b/source/Downloader.py
index c69b31d..4a8f887 100644
--- a/source/Downloader.py
+++ b/source/Downloader.py
@@ -1,11 +1,8 @@
from pathlib import Path
-from aiohttp import ClientConnectionError
-from aiohttp import ClientProxyConnectionError
-from aiohttp import ClientSSLError
from aiohttp import ClientSession
-
-# from aiohttp import ClientTimeout
+from aiohttp import ClientTimeout
+from aiohttp import ServerTimeoutError
__all__ = ['Download']
@@ -26,8 +23,9 @@ class Download:
self.root = self.__init_root(root, path, folder)
self.proxy = proxy
self.chunk = chunk
- # self.timeout = ClientTimeout(total=timeout)
- self.session = ClientSession(headers=manager.headers)
+ self.session = ClientSession(
+ headers=manager.headers,
+ timeout=ClientTimeout(connect=timeout))
def __init_root(self, root: Path, path: str, folder: str) -> Path:
if path and (r := Path(path)).is_dir():
@@ -42,8 +40,10 @@ class Download:
if type_ == 0:
await self.__download(urls[0], f"{name}.mp4", log, bar)
elif type_ == 1:
- for index, url in enumerate(urls):
- await self.__download(url, f"{name}_{index + 1}.png", log, bar)
+ for index, url in enumerate(urls, start=1):
+ await self.__download(url, f"{name}_{index}.png", log, bar)
+ else:
+ raise ValueError
async def __download(self, url: str, name: str, log, bar):
temp = self.temp.joinpath(name)
@@ -52,32 +52,26 @@ class Download:
return
try:
async with self.session.get(url, proxy=self.proxy) as response:
- # self.__create_progress(bar, int(response.headers.get('content-length', 0)))
+ self.__create_progress(
+ bar, int(
+ response.headers.get(
+ 'content-length', 0)) or None)
with temp.open("wb") as f:
async for chunk in response.content.iter_chunked(self.chunk):
f.write(chunk)
- # self.__update_progress(bar, len(chunk))
- # self.__remove_progress(bar)
+ self.__update_progress(bar, len(chunk))
self.manager.move(temp, file)
- except (
- ClientProxyConnectionError,
- ClientSSLError,
- ClientConnectionError,
- TimeoutError,
- ):
+ self.__create_progress(bar, None)
+ except ServerTimeoutError:
self.manager.delete(temp)
- # self.__remove_progress(bar)
+ self.__create_progress(bar, None)
- # @staticmethod
- # def __create_progress(bar, total: int | None):
- # if bar:
- # bar.update(total=total)
- #
- # @staticmethod
- # def __update_progress(bar, advance: int):
- # if bar:
- # bar.advance(advance)
- #
- # @staticmethod
- # def __remove_progress(bar):
- # pass
+ @staticmethod
+ def __create_progress(bar, total: int | None):
+ if bar:
+ bar.update(total=total)
+
+ @staticmethod
+ def __update_progress(bar, advance: int):
+ if bar:
+ bar.advance(advance)
diff --git a/source/Html.py b/source/Html.py
index 8be1848..e84db65 100644
--- a/source/Html.py
+++ b/source/Html.py
@@ -1,9 +1,6 @@
-from aiohttp import ClientConnectionError
-from aiohttp import ClientProxyConnectionError
-from aiohttp import ClientSSLError
from aiohttp import ClientSession
-
-# from aiohttp import ClientTimeout
+from aiohttp import ClientTimeout
+from aiohttp import ServerTimeoutError
__all__ = ['Html']
@@ -18,7 +15,9 @@ class Html:
self.proxy = proxy
self.session = ClientSession(
headers=headers | {
- "Referer": "https://www.xiaohongshu.com/", })
+ "Referer": "https://www.xiaohongshu.com/", },
+ timeout=ClientTimeout(connect=timeout),
+ )
async def request_url(
self,
@@ -30,11 +29,7 @@ class Html:
proxy=self.proxy,
) as response:
return await response.text() if text else response.url
- except (
- ClientProxyConnectionError,
- ClientSSLError,
- ClientConnectionError,
- ):
+ except ServerTimeoutError:
return ""
@staticmethod
diff --git a/source/Manager.py b/source/Manager.py
index 502d921..ee5b30e 100644
--- a/source/Manager.py
+++ b/source/Manager.py
@@ -6,12 +6,11 @@ __all__ = ["Manager"]
class Manager:
- headers = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
- "Chrome/119.0.0.0 Safari/537.36", }
-
- def __init__(self, root: Path):
+ def __init__(self, root: Path, ua: str):
self.temp = root.joinpath("./temp")
+ self.headers = {
+ "User-Agent": ua or "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
+ "Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0", }
@staticmethod
def is_exists(path: Path) -> bool:
diff --git a/source/Settings.py b/source/Settings.py
index f705536..a8a0b4d 100644
--- a/source/Settings.py
+++ b/source/Settings.py
@@ -9,7 +9,8 @@ class Settings:
default = {
"path": "",
"folder": "Download",
- "proxies": None,
+ "user_agent": "",
+ "proxy": "",
"timeout": 10,
"chunk": 1024 * 1024,
}
diff --git a/source/__init__.py b/source/__init__.py
index b69089d..5a7d280 100644
--- a/source/__init__.py
+++ b/source/__init__.py
@@ -30,20 +30,27 @@ __all__ = ['XHS', 'XHSDownloader']
class XHS:
ROOT = Path(__file__).resolve().parent.parent
- link = compile(r"https://www\.xiaohongshu\.com/explore/[a-z0-9]+")
- share = compile(r"https://www\.xiaohongshu\.com/discovery/item/[a-z0-9]+")
- short = compile(r"https://xhslink\.com/[A-Za-z0-9]+")
+ LINK = compile(r"https://www\.xiaohongshu\.com/explore/[a-z0-9]+")
+ SHARE = compile(r"https://www\.xiaohongshu\.com/discovery/item/[a-z0-9]+")
+ SHORT = compile(r"https://xhslink\.com/[A-Za-z0-9]+")
+ __INSTANCE = None
+
+ def __new__(cls, *args, **kwargs):
+ if not cls.__INSTANCE:
+ cls.__INSTANCE = super().__new__(cls)
+ return cls.__INSTANCE
def __init__(
self,
path="",
folder="Download",
- proxy=None,
+ user_agent: str = None,
+ proxy: str = None,
timeout=10,
chunk=1024 * 1024,
**kwargs,
):
- self.manager = Manager(self.ROOT)
+ self.manager = Manager(self.ROOT, user_agent)
self.html = Html(self.manager.headers, proxy, timeout)
self.image = Image()
self.video = Video()
@@ -81,12 +88,12 @@ class XHS:
async def __deal_links(self, url: str) -> list:
urls = []
for i in url.split():
- if u := self.short.search(i):
+ if u := self.SHORT.search(i):
i = await self.html.request_url(
u.group(), False)
- if u := self.share.search(i):
+ if u := self.SHARE.search(i):
urls.append(u.group())
- elif u := self.link.search(i):
+ elif u := self.LINK.search(i):
urls.append(u.group())
return urls
@@ -118,18 +125,19 @@ class XHS:
await self.html.session.close()
await self.download.session.close()
- def rich_log(self, log, text, style="b bright_green"):
+ @staticmethod
+ def rich_log(log, text, style="b bright_green"):
if log:
log.write(Text(text, style=style))
else:
- self.console.print(text, style=style)
+ print(text)
class XHSDownloader(App):
VERSION = 1.6
BETA = True
ROOT = Path(__file__).resolve().parent.parent
- APP = XHS(**Settings(ROOT).run())
+ # APP = XHS(**Settings(ROOT).run())
CSS_PATH = ROOT.joinpath(
"static/XHS-Downloader.tcss")
BINDINGS = [