From 15e412e6aed46bc46fc5230082c585f5809681ac Mon Sep 17 00:00:00 2001 From: JoeanAmier Date: Wed, 21 Aug 2024 19:47:25 +0800 Subject: [PATCH] =?UTF-8?q?feat(download.py):=20=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E4=B8=8B=E8=BD=BD=E5=BB=B6=E6=97=B6=E5=A4=84?= =?UTF-8?q?=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- source/application/app.py | 57 +++++++++++++++++++++++++--------- source/application/download.py | 3 ++ source/module/__init__.py | 1 + source/module/tools.py | 10 ++++++ 4 files changed, 57 insertions(+), 14 deletions(-) diff --git a/source/application/app.py b/source/application/app.py index 47a425d..796947f 100644 --- a/source/application/app.py +++ b/source/application/app.py @@ -150,7 +150,14 @@ class XHS: container["下载地址"] = self.video.get_video_link(data) container["动图地址"] = [None, ] - async def __download_files(self, container: dict, download: bool, index, log, bar): + async def __download_files( + self, + container: dict, + download: bool, + index, + log, + bar, + ): name = self.__naming_rules(container) if (u := container["下载地址"]) and download: if await self.skip_download(i := container["作品ID"]): @@ -178,17 +185,19 @@ class XHS: data["动图地址"] = " ".join(i or "NaN" for i in data["动图地址"]) await self.data_recorder.add(**data) - async def __add_record(self, id_: str, result: tuple) -> None: + async def __add_record(self, id_: str, result: list) -> None: if all(result): await self.id_recorder.add(id_) - async def extract(self, - url: str, - download=False, - index: list | tuple = None, - log=None, - bar=None, - data=True, ) -> list[dict]: + async def extract( + self, + url: str, + download=False, + index: list | tuple = None, + log=None, + bar=None, + data=True, + ) -> list[dict]: # return # 调试代码 urls = await self.__extract_links(url, log) if not urls: @@ -199,13 +208,15 @@ class XHS: # return urls # 调试代码 return [await self.__deal_extract(i, download, index, log, bar, data, ) for i in urls] - async def extract_cli(self, + async def extract_cli( + self, url: str, download=True, index: list | tuple = None, log=None, bar=None, - data=False, ) -> None: + data=False, + ) -> None: url = await self.__extract_links(url, log) if not url: logging(log, self.message("提取小红书作品链接失败"), WARNING) @@ -217,14 +228,25 @@ class XHS: for i in url.split(): if u := self.SHORT.search(i): i = await self.html.request_url( - u.group(), False, log) + u.group(), + False, + log, + ) if u := self.SHARE.search(i): urls.append(u.group()) elif u := self.LINK.search(i): urls.append(u.group()) return urls - async def __deal_extract(self, url: str, download: bool, index: list | tuple | None, log, bar, data: bool, ): + async def __deal_extract( + self, + url: str, + download: bool, + index: list | tuple | None, + log, + bar, + data: bool, + ): if await self.skip_download(i := self.__extract_link_id(url)) and not data: msg = self.message("作品 {0} 存在下载记录,跳过处理").format(i) logging(log, msg) @@ -294,7 +316,14 @@ class XHS: 64, ) or data["作品ID"] - async def monitor(self, delay=1, download=False, log=None, bar=None, data=True, ) -> None: + async def monitor( + self, + delay=1, + download=False, + log=None, + bar=None, + data=True, + ) -> None: logging( None, self.message( diff --git a/source/application/download.py b/source/application/download.py index 1ad6aaf..ee73976 100644 --- a/source/application/download.py +++ b/source/application/download.py @@ -9,6 +9,7 @@ from source.module import ERROR from source.module import Manager from source.module import logging from source.module import retry as re_download +from source.module import sleep_time if TYPE_CHECKING: from httpx import AsyncClient @@ -157,6 +158,7 @@ class Download: self.__update_headers_range(headers, temp, ) try: async with self.client.stream("GET", url, headers=headers, ) as response: + await sleep_time() response.raise_for_status() # self.__create_progress( # bar, @@ -211,6 +213,7 @@ class Download: url, headers=headers, ) + await sleep_time() response.raise_for_status() suffix = self.__extract_type( response.headers.get("Content-Type")) or suffix diff --git a/source/module/__init__.py b/source/module/__init__.py index db29e74..dadc533 100644 --- a/source/module/__init__.py +++ b/source/module/__init__.py @@ -32,5 +32,6 @@ from .static import ( from .tools import ( retry, logging, + sleep_time, ) from .translator import Translate diff --git a/source/module/tools.py b/source/module/tools.py index 5f10731..f29a39c 100644 --- a/source/module/tools.py +++ b/source/module/tools.py @@ -1,3 +1,6 @@ +from asyncio import sleep +from random import uniform + from rich import print from rich.text import Text @@ -24,3 +27,10 @@ def logging(log, text, style=INFO): log.write(string) else: print(string) + + +async def sleep_time( + min_time: int = 1, + max_time: int = 3, +): + await sleep(uniform(min_time, max_time))