mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2025-12-26 12:56:22 +08:00
1. 移除 sec_ch_ua_platform 参数 2. 移除 sec_ch_ua 参数 3. 优化请求延时间隔 4. 优化并发下载功能 5. 修正英语翻译错误 6. 新增并发下载限制 7. 修正命令行模式错误 8. 简化数据请求头 Closes #86 Closes #87 Closes #93 Closes #98 Closes #105 Closes #109 Closes #110 Closes #140 Closes #152 Closes #154 Closes #157 Closes #159 Closes #160 Closes #162 Closes #164 Closes #165
68 lines
2.0 KiB
Python
68 lines
2.0 KiB
Python
from httpx import HTTPError
|
|
|
|
from source.module import ERROR
|
|
from source.module import Manager
|
|
from source.module import logging
|
|
from source.module import retry
|
|
from source.module import sleep_time
|
|
|
|
__all__ = ["Html"]
|
|
|
|
|
|
class Html:
|
|
def __init__(self, manager: Manager, ):
|
|
self.retry = manager.retry
|
|
self.message = manager.message
|
|
self.client = manager.request_client
|
|
self.headers = manager.headers
|
|
self.blank_headers = manager.blank_headers
|
|
|
|
@retry
|
|
async def request_url(
|
|
self,
|
|
url: str,
|
|
content=True,
|
|
log=None,
|
|
**kwargs,
|
|
) -> str:
|
|
headers = self.select_headers(url, )
|
|
try:
|
|
match content:
|
|
case True:
|
|
response = await self.__request_url_get(url, headers, **kwargs, )
|
|
await sleep_time()
|
|
response.raise_for_status()
|
|
return response.text
|
|
case False:
|
|
response = await self.__request_url_head(url, headers, **kwargs, )
|
|
await sleep_time()
|
|
return str(response.url)
|
|
except HTTPError as error:
|
|
logging(
|
|
log,
|
|
self.message("网络异常,{0} 请求失败: {1}").format(url, repr(error)),
|
|
ERROR
|
|
)
|
|
return ""
|
|
|
|
@staticmethod
|
|
def format_url(url: str) -> str:
|
|
return bytes(url, "utf-8").decode("unicode_escape")
|
|
|
|
def select_headers(self, url: str) -> dict:
|
|
return self.headers if "explore" in url else self.blank_headers
|
|
|
|
async def __request_url_head(self, url: str, headers: dict, **kwargs, ):
|
|
return await self.client.head(
|
|
url,
|
|
headers=headers,
|
|
**kwargs,
|
|
)
|
|
|
|
async def __request_url_get(self, url: str, headers: dict, **kwargs, ):
|
|
return await self.client.get(
|
|
url,
|
|
headers=headers,
|
|
**kwargs,
|
|
)
|