diff --git a/README.md b/README.md index b01126d..3a5f585 100644 --- a/README.md +++ b/README.md @@ -11,10 +11,11 @@

📑 功能清单

📸 程序截图


@@ -25,7 +26,7 @@
  • https://www.xiaohongshu.com/discovery/item/作品ID
  • https://xhslink.com/分享码

  • -

    可以单次输入多个作品链接,链接之间使用空格分隔。

    +

    支持单次输入多个作品链接,链接之间使用空格分隔。

    🪟 关于终端

    ⭐ 推荐使用 Windows 终端 (Windows 11 自带默认终端)运行程序以便获得最佳显示效果!

    @@ -41,7 +42,7 @@
  • 运行 main.py 即可使用
  • 💻 二次开发

    -

    如果想要获取小红书图文/视频作品信息,可以根据 main.py 的注释提示进行代码调用。

    +

    如果需要获取小红书图文/视频作品信息,可以根据 main.py 的注释提示进行代码调用。

     # 测试链接
     error_demo = "https://github.com/JoeanAmier/XHS_Downloader"
    @@ -49,24 +50,26 @@ image_demo = "https://www.xiaohongshu.com/explore/63b275a30000000019020185"
     video_demo = "https://www.xiaohongshu.com/explore/64edb460000000001f03cadc"
     multiple_demo = f"{image_demo} {video_demo}"
     # 实例对象
    -path = "D:\\"  # 作品下载储存根路径,默认值:当前路径
    +path = ""  # 作品下载储存根路径,默认值:当前路径
     folder = "Download"  # 作品下载文件夹名称(自动创建),默认值:Download
    -proxies = None  # 网络代理
    +user_agent = ""  # 请求头 User-Agent
    +proxy = None  # 网络代理
     timeout = 5  # 网络请求超时限制,默认值:10
     chunk = 1024 * 1024  # 下载文件时,每次从服务器获取的数据块大小,单位字节
    -# with XHS() as xhs:
    +# async with XHS() as xhs:
     #     pass  # 使用默认参数
    -with XHS(path=path,
    -         folder=folder,
    -         proxies=proxies,
    -         timeout=timeout,
    -         chunk=chunk) as xhs:  # 使用自定义参数
    +async with XHS(path=path,
    +               folder=folder,
    +               user_agent=user_agent,
    +               proxy=proxy,
    +               timeout=timeout,
    +               chunk=chunk) as xhs:  # 使用自定义参数
         download = True  # 是否下载作品文件,默认值:False
         # 返回作品详细信息,包括下载地址
    -    print(xhs.extract(error_demo))  # 获取数据失败时返回空字典
    -    print(xhs.extract(image_demo, download=download))
    -    print(xhs.extract(video_demo, download=download))
    -    print(xhs.extract(multiple_demo, download=download))
    +    print(await xhs.extract(error_demo, download=download))  # 获取数据失败时返回空字典
    +    print(await xhs.extract(image_demo, download=download))
    +    print(await xhs.extract(video_demo, download=download))
    +    print(await xhs.extract(multiple_demo, download=download))  # 支持传入多个作品链接
     

    ⚙️ 配置文件

    项目根目录下的 settings.json 文件,首次运行自动生成,可以自定义部分运行参数。

    @@ -83,7 +86,7 @@ with XHS(path=path, path str -作品文件储存根路径 +作品数据 / 文件保存根路径 项目根路径 @@ -93,6 +96,12 @@ with XHS(path=path, Download +user_agent +str +请求头 User-Agent +内置 UA + + proxy str 设置代理 diff --git a/main.py b/main.py index 46bad1f..2f7d36f 100644 --- a/main.py +++ b/main.py @@ -11,24 +11,26 @@ async def example(): video_demo = "https://www.xiaohongshu.com/explore/64edb460000000001f03cadc" multiple_demo = f"{image_demo} {video_demo}" # 实例对象 - path = "D:\\" # 作品下载储存根路径,默认值:当前路径 + path = "" # 作品下载储存根路径,默认值:当前路径 folder = "Download" # 作品下载文件夹名称(自动创建),默认值:Download - proxies = None # 网络代理 + user_agent = "" # 请求头 User-Agent + proxy = None # 网络代理 timeout = 5 # 网络请求超时限制,默认值:10 chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位字节 - async with XHS() as xhs: - pass # 使用默认参数 + # async with XHS() as xhs: + # pass # 使用默认参数 async with XHS(path=path, folder=folder, - proxy=proxies, + user_agent=user_agent, + proxy=proxy, timeout=timeout, chunk=chunk) as xhs: # 使用自定义参数 - download = False # 是否下载作品文件,默认值:False + download = True # 是否下载作品文件,默认值:False # 返回作品详细信息,包括下载地址 - print(await xhs.extract(error_demo)) # 获取数据失败时返回空字典 + print(await xhs.extract(error_demo, download=download)) # 获取数据失败时返回空字典 print(await xhs.extract(image_demo, download=download)) print(await xhs.extract(video_demo, download=download)) - print(await xhs.extract(multiple_demo, download=download)) + print(await xhs.extract(multiple_demo, download=download)) # 支持传入多个作品链接 if __name__ == '__main__': diff --git a/source/Downloader.py b/source/Downloader.py index c69b31d..4a8f887 100644 --- a/source/Downloader.py +++ b/source/Downloader.py @@ -1,11 +1,8 @@ from pathlib import Path -from aiohttp import ClientConnectionError -from aiohttp import ClientProxyConnectionError -from aiohttp import ClientSSLError from aiohttp import ClientSession - -# from aiohttp import ClientTimeout +from aiohttp import ClientTimeout +from aiohttp import ServerTimeoutError __all__ = ['Download'] @@ -26,8 +23,9 @@ class Download: self.root = self.__init_root(root, path, folder) self.proxy = proxy self.chunk = chunk - # self.timeout = ClientTimeout(total=timeout) - self.session = ClientSession(headers=manager.headers) + self.session = ClientSession( + headers=manager.headers, + timeout=ClientTimeout(connect=timeout)) def __init_root(self, root: Path, path: str, folder: str) -> Path: if path and (r := Path(path)).is_dir(): @@ -42,8 +40,10 @@ class Download: if type_ == 0: await self.__download(urls[0], f"{name}.mp4", log, bar) elif type_ == 1: - for index, url in enumerate(urls): - await self.__download(url, f"{name}_{index + 1}.png", log, bar) + for index, url in enumerate(urls, start=1): + await self.__download(url, f"{name}_{index}.png", log, bar) + else: + raise ValueError async def __download(self, url: str, name: str, log, bar): temp = self.temp.joinpath(name) @@ -52,32 +52,26 @@ class Download: return try: async with self.session.get(url, proxy=self.proxy) as response: - # self.__create_progress(bar, int(response.headers.get('content-length', 0))) + self.__create_progress( + bar, int( + response.headers.get( + 'content-length', 0)) or None) with temp.open("wb") as f: async for chunk in response.content.iter_chunked(self.chunk): f.write(chunk) - # self.__update_progress(bar, len(chunk)) - # self.__remove_progress(bar) + self.__update_progress(bar, len(chunk)) self.manager.move(temp, file) - except ( - ClientProxyConnectionError, - ClientSSLError, - ClientConnectionError, - TimeoutError, - ): + self.__create_progress(bar, None) + except ServerTimeoutError: self.manager.delete(temp) - # self.__remove_progress(bar) + self.__create_progress(bar, None) - # @staticmethod - # def __create_progress(bar, total: int | None): - # if bar: - # bar.update(total=total) - # - # @staticmethod - # def __update_progress(bar, advance: int): - # if bar: - # bar.advance(advance) - # - # @staticmethod - # def __remove_progress(bar): - # pass + @staticmethod + def __create_progress(bar, total: int | None): + if bar: + bar.update(total=total) + + @staticmethod + def __update_progress(bar, advance: int): + if bar: + bar.advance(advance) diff --git a/source/Html.py b/source/Html.py index 8be1848..e84db65 100644 --- a/source/Html.py +++ b/source/Html.py @@ -1,9 +1,6 @@ -from aiohttp import ClientConnectionError -from aiohttp import ClientProxyConnectionError -from aiohttp import ClientSSLError from aiohttp import ClientSession - -# from aiohttp import ClientTimeout +from aiohttp import ClientTimeout +from aiohttp import ServerTimeoutError __all__ = ['Html'] @@ -18,7 +15,9 @@ class Html: self.proxy = proxy self.session = ClientSession( headers=headers | { - "Referer": "https://www.xiaohongshu.com/", }) + "Referer": "https://www.xiaohongshu.com/", }, + timeout=ClientTimeout(connect=timeout), + ) async def request_url( self, @@ -30,11 +29,7 @@ class Html: proxy=self.proxy, ) as response: return await response.text() if text else response.url - except ( - ClientProxyConnectionError, - ClientSSLError, - ClientConnectionError, - ): + except ServerTimeoutError: return "" @staticmethod diff --git a/source/Manager.py b/source/Manager.py index 502d921..ee5b30e 100644 --- a/source/Manager.py +++ b/source/Manager.py @@ -6,12 +6,11 @@ __all__ = ["Manager"] class Manager: - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/119.0.0.0 Safari/537.36", } - - def __init__(self, root: Path): + def __init__(self, root: Path, ua: str): self.temp = root.joinpath("./temp") + self.headers = { + "User-Agent": ua or "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0", } @staticmethod def is_exists(path: Path) -> bool: diff --git a/source/Settings.py b/source/Settings.py index f705536..a8a0b4d 100644 --- a/source/Settings.py +++ b/source/Settings.py @@ -9,7 +9,8 @@ class Settings: default = { "path": "", "folder": "Download", - "proxies": None, + "user_agent": "", + "proxy": "", "timeout": 10, "chunk": 1024 * 1024, } diff --git a/source/__init__.py b/source/__init__.py index b69089d..5a7d280 100644 --- a/source/__init__.py +++ b/source/__init__.py @@ -30,20 +30,27 @@ __all__ = ['XHS', 'XHSDownloader'] class XHS: ROOT = Path(__file__).resolve().parent.parent - link = compile(r"https://www\.xiaohongshu\.com/explore/[a-z0-9]+") - share = compile(r"https://www\.xiaohongshu\.com/discovery/item/[a-z0-9]+") - short = compile(r"https://xhslink\.com/[A-Za-z0-9]+") + LINK = compile(r"https://www\.xiaohongshu\.com/explore/[a-z0-9]+") + SHARE = compile(r"https://www\.xiaohongshu\.com/discovery/item/[a-z0-9]+") + SHORT = compile(r"https://xhslink\.com/[A-Za-z0-9]+") + __INSTANCE = None + + def __new__(cls, *args, **kwargs): + if not cls.__INSTANCE: + cls.__INSTANCE = super().__new__(cls) + return cls.__INSTANCE def __init__( self, path="", folder="Download", - proxy=None, + user_agent: str = None, + proxy: str = None, timeout=10, chunk=1024 * 1024, **kwargs, ): - self.manager = Manager(self.ROOT) + self.manager = Manager(self.ROOT, user_agent) self.html = Html(self.manager.headers, proxy, timeout) self.image = Image() self.video = Video() @@ -81,12 +88,12 @@ class XHS: async def __deal_links(self, url: str) -> list: urls = [] for i in url.split(): - if u := self.short.search(i): + if u := self.SHORT.search(i): i = await self.html.request_url( u.group(), False) - if u := self.share.search(i): + if u := self.SHARE.search(i): urls.append(u.group()) - elif u := self.link.search(i): + elif u := self.LINK.search(i): urls.append(u.group()) return urls @@ -118,18 +125,19 @@ class XHS: await self.html.session.close() await self.download.session.close() - def rich_log(self, log, text, style="b bright_green"): + @staticmethod + def rich_log(log, text, style="b bright_green"): if log: log.write(Text(text, style=style)) else: - self.console.print(text, style=style) + print(text) class XHSDownloader(App): VERSION = 1.6 BETA = True ROOT = Path(__file__).resolve().parent.parent - APP = XHS(**Settings(ROOT).run()) + # APP = XHS(**Settings(ROOT).run()) CSS_PATH = ROOT.joinpath( "static/XHS-Downloader.tcss") BINDINGS = [