mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2025-12-26 04:48:05 +08:00
fix: 修复项目功能异常
1. 修复 ScannerError 错误 2. 优化作品数据提取逻辑 3. 移除 Cookie 参数 Closes #124 Closes #126
This commit is contained in:
parent
3e67dfb634
commit
01111cc401
Binary file not shown.
@ -226,9 +226,6 @@ msgstr "If XHS-Downloader is helpful to you, please consider giving it Star. Tha
|
||||
msgid "作者的其他开源项目"
|
||||
msgstr "Other open-source projects of the author"
|
||||
|
||||
msgid "文件 {0} 请求失败,响应码 {1}"
|
||||
msgstr "File {0} request failed with response code {1}"
|
||||
|
||||
msgid "视频作品下载功能已关闭,跳过下载"
|
||||
msgstr "The video download function has been turned off, skip download"
|
||||
|
||||
|
||||
@ -226,9 +226,6 @@ msgstr ""
|
||||
msgid "作者的其他开源项目"
|
||||
msgstr ""
|
||||
|
||||
msgid "文件 {0} 请求失败,响应码 {1}"
|
||||
msgstr ""
|
||||
|
||||
msgid "视频作品下载功能已关闭,跳过下载"
|
||||
msgstr ""
|
||||
|
||||
|
||||
@ -117,11 +117,7 @@ class Download:
|
||||
temp = self.temp.joinpath(f"{name}.{format_}")
|
||||
try:
|
||||
async with self.client.stream("GET", url, ) as response:
|
||||
if response.status_code != 200:
|
||||
logging(
|
||||
log, self.message("链接 {0} 请求失败,响应码 {1}").format(
|
||||
url, response.status_code), style=ERROR)
|
||||
return False
|
||||
response.raise_for_status()
|
||||
suffix = self.__extract_type(
|
||||
response.headers.get("Content-Type")) or format_
|
||||
real = path.joinpath(f"{name}.{suffix}")
|
||||
|
||||
@ -27,8 +27,7 @@ class Html:
|
||||
url,
|
||||
**kwargs,
|
||||
)
|
||||
if response.status_code != 200:
|
||||
return ""
|
||||
response.raise_for_status()
|
||||
return response.text if content else str(response.url)
|
||||
except HTTPError as error:
|
||||
logging(log, str(error), ERROR)
|
||||
|
||||
@ -7,7 +7,7 @@ __all__ = ["Converter"]
|
||||
|
||||
|
||||
class Converter:
|
||||
INITIAL_STATE = "(//script)[last()]/text()"
|
||||
INITIAL_STATE = "//script/text()"
|
||||
KEYS_LINK = (
|
||||
"note",
|
||||
"noteDetailMap",
|
||||
@ -16,22 +16,23 @@ class Converter:
|
||||
)
|
||||
|
||||
def run(self, content: str) -> dict:
|
||||
return self.__filter_object(
|
||||
self.__convert_object(
|
||||
self.__extract_object(content)))
|
||||
return self._filter_object(
|
||||
self._convert_object(
|
||||
self._extract_object(content)))
|
||||
|
||||
def __extract_object(self, html: str) -> str:
|
||||
def _extract_object(self, html: str) -> str:
|
||||
if not html:
|
||||
return ""
|
||||
html_tree = HTML(html)
|
||||
return d[0] if (d := html_tree.xpath(self.INITIAL_STATE)) else ""
|
||||
scripts = html_tree.xpath(self.INITIAL_STATE)
|
||||
return self.get_script(scripts)
|
||||
|
||||
@staticmethod
|
||||
def __convert_object(text: str) -> dict:
|
||||
def _convert_object(text: str) -> dict:
|
||||
return safe_load(text.lstrip("window.__INITIAL_STATE__="))
|
||||
|
||||
@classmethod
|
||||
def __filter_object(cls, data: dict) -> dict:
|
||||
def _filter_object(cls, data: dict) -> dict:
|
||||
return cls.deep_get(data, cls.KEYS_LINK) or {}
|
||||
|
||||
@classmethod
|
||||
@ -55,3 +56,11 @@ class Converter:
|
||||
elif isinstance(data, list | tuple | set):
|
||||
return data[index]
|
||||
raise TypeError
|
||||
|
||||
@staticmethod
|
||||
def get_script(scripts: list) -> str:
|
||||
scripts.reverse()
|
||||
for script in scripts:
|
||||
if script.startswith("window.__INITIAL_STATE__"):
|
||||
return script
|
||||
return ""
|
||||
|
||||
@ -77,7 +77,7 @@ class Manager:
|
||||
"Sec-Ch-Ua": sec_ch_ua or SEC_CH_UA,
|
||||
"Sec-Ch-Ua-Platform": sec_ch_ua_platform or SEC_CH_UA_PLATFORM,
|
||||
}
|
||||
self.headers = self.blank_headers | {"Cookie": cookie}
|
||||
self.headers = self.blank_headers | {"Cookie": ""}
|
||||
self.retry = retry
|
||||
self.chunk = chunk
|
||||
self.name_format = self.__check_name_format(name_format)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user