From 01111cc401085b7a53a327e41a1f66fe6aea8c92 Mon Sep 17 00:00:00 2001 From: JoeanAmier Date: Sun, 28 Jul 2024 21:32:00 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=E5=BC=82=E5=B8=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. 修复 ScannerError 错误 2. 优化作品数据提取逻辑 3. 移除 Cookie 参数 Closes #124 Closes #126 --- locale/en_GB/LC_MESSAGES/xhs.mo | Bin 12092 -> 11989 bytes locale/en_GB/LC_MESSAGES/xhs.po | 3 --- locale/zh_CN/LC_MESSAGES/xhs.po | 3 --- source/application/download.py | 6 +----- source/application/request.py | 3 +-- source/expansion/converter.py | 25 +++++++++++++++++-------- source/module/manager.py | 2 +- 7 files changed, 20 insertions(+), 22 deletions(-) diff --git a/locale/en_GB/LC_MESSAGES/xhs.mo b/locale/en_GB/LC_MESSAGES/xhs.mo index 9450293e3cbfa38792da0b7d49b6afc90fda9830..c8c46d62fd200a9d1194dd3be43299f8dbab3200 100644 GIT binary patch delta 1973 zcmXxkSx8ku9LMp0i%TxKWSK2$Wm##dk*#J|+bmmbvCLG;$cX5nAlifjWwc-mNee;= z_7vGd7h0%?XhG6~h>EDF2$2XXd$OSI`@45K_&=YSIi5K)|CzaWs$Nuj@8Wx(G_ERQ z2ocrG%)?(H+_-*(nGMCC&ctxD>C|&^0B*(Mc+k}^;xOv>a6NY5Vl3)oHW*K14Eph= zK6RghK2RHBmWYkcYnVyB&Do6usi#E}I0N;eYE*k2>H}w;w=jzO3yj8(I10N^-$~?Q zs-~JnK^>G}f86Bio~xg6^~*Ss=dBozzc3qP=v@QyVL5I<^>Y`K@FkAME}Vu*eS@yR zWTpBT1r6Xb(x^MtbrQ)q9& zRD2vy{xz~MG{~5K!Ib8p22_navOSoCM{yk9MGfRDX5fGXvq@Np8o(~p`-e~iX+~xA zimSJy{_j-+`LCn!$vxP@!cC#R8?~$bsFc3MW%v~bVF6ozBvzn0-izb$93H?YxE4!! zSw8+ph2vt!gzCT8OrRLbg6Gj74L_#FAOzuXv;CDEHU zSr#f|W%!gYZpK{dc_V`VzYR69dgPILSKNa(*U*9L;4f#@Yfe}_jfh5?BhTO0W zZ!slZ(LrnxDo@(JN{f$JPE--A2whrW9S7~yQX+(4=i5?3*DQiZZ(cvqD%kL|euxU8S=n8*Q#XK-}8Oscp?^`TjijoN=DlInRBb^W%HYdH8Kp z_vY~Jlp#lrR7OrGr^cFvF)_e}6d!Lk0f#wra5nWt_$)TzL_F^57x8)OH*pI-#1&XN z%xpZK#gX_Oeyy&4rJxQDB$%b)Ip-D3rQYX^8*Vn1dI7$GC8&<7-Tf9+2j`tvF_HQJ zj==kvjk>9Ra?sOM=TT6@qX6^->G=zQBOy8v>Y|_-B^hc)O(LmGmA@Rb{NDW zEJn58gnF+P74dEitHC}BTKoS|52lZbHaHV?e*tPnWvJ)tP#v@)ecBcL0dJxr)5=Bn z&!Yx*$$8D$i$U)9rx1US!jRF?k!3lHF`xTYr~$Pi?^p*;!|(AW{1-Kl>{PQUxB_S4 z`=|l5qu&1(HIP13WCvY6b`0^?gUL+qL-bG&w&BZo0kymTM1?kqEx!h{aU9lT7Pg=o z?!u{f3!8B$FO}m#)bD>G2f`9Gsu;7;VG3#}m7StB$wS_?5>!JKs7ORmA?rcSxF27{ zL|&$7g~%9fHEOeMMMdl|-k`_RIFov9X7u@UsELJpDDayNx*wilfmKgOHBg8e`BE&z zN>oQDa4vQtui67F$27vB0qsRicYeAONIy%P93+UIt_da{IAce$P)Pfu9o0hvKD?id3MJ?apU8& zk2UvS{L*X4Z}jed{?(oK4+hS(-|cMsyZgXEuO{Se*;&C;yt8pqec&mLircOL diff --git a/locale/en_GB/LC_MESSAGES/xhs.po b/locale/en_GB/LC_MESSAGES/xhs.po index 538b330..d2ff7ec 100644 --- a/locale/en_GB/LC_MESSAGES/xhs.po +++ b/locale/en_GB/LC_MESSAGES/xhs.po @@ -226,9 +226,6 @@ msgstr "If XHS-Downloader is helpful to you, please consider giving it Star. Tha msgid "作者的其他开源项目" msgstr "Other open-source projects of the author" -msgid "文件 {0} 请求失败,响应码 {1}" -msgstr "File {0} request failed with response code {1}" - msgid "视频作品下载功能已关闭,跳过下载" msgstr "The video download function has been turned off, skip download" diff --git a/locale/zh_CN/LC_MESSAGES/xhs.po b/locale/zh_CN/LC_MESSAGES/xhs.po index 96143df..b1fc6da 100644 --- a/locale/zh_CN/LC_MESSAGES/xhs.po +++ b/locale/zh_CN/LC_MESSAGES/xhs.po @@ -226,9 +226,6 @@ msgstr "" msgid "作者的其他开源项目" msgstr "" -msgid "文件 {0} 请求失败,响应码 {1}" -msgstr "" - msgid "视频作品下载功能已关闭,跳过下载" msgstr "" diff --git a/source/application/download.py b/source/application/download.py index 5af2b59..62ad406 100644 --- a/source/application/download.py +++ b/source/application/download.py @@ -117,11 +117,7 @@ class Download: temp = self.temp.joinpath(f"{name}.{format_}") try: async with self.client.stream("GET", url, ) as response: - if response.status_code != 200: - logging( - log, self.message("链接 {0} 请求失败,响应码 {1}").format( - url, response.status_code), style=ERROR) - return False + response.raise_for_status() suffix = self.__extract_type( response.headers.get("Content-Type")) or format_ real = path.joinpath(f"{name}.{suffix}") diff --git a/source/application/request.py b/source/application/request.py index 42e1779..1f8001f 100644 --- a/source/application/request.py +++ b/source/application/request.py @@ -27,8 +27,7 @@ class Html: url, **kwargs, ) - if response.status_code != 200: - return "" + response.raise_for_status() return response.text if content else str(response.url) except HTTPError as error: logging(log, str(error), ERROR) diff --git a/source/expansion/converter.py b/source/expansion/converter.py index fcd8f16..578b141 100644 --- a/source/expansion/converter.py +++ b/source/expansion/converter.py @@ -7,7 +7,7 @@ __all__ = ["Converter"] class Converter: - INITIAL_STATE = "(//script)[last()]/text()" + INITIAL_STATE = "//script/text()" KEYS_LINK = ( "note", "noteDetailMap", @@ -16,22 +16,23 @@ class Converter: ) def run(self, content: str) -> dict: - return self.__filter_object( - self.__convert_object( - self.__extract_object(content))) + return self._filter_object( + self._convert_object( + self._extract_object(content))) - def __extract_object(self, html: str) -> str: + def _extract_object(self, html: str) -> str: if not html: return "" html_tree = HTML(html) - return d[0] if (d := html_tree.xpath(self.INITIAL_STATE)) else "" + scripts = html_tree.xpath(self.INITIAL_STATE) + return self.get_script(scripts) @staticmethod - def __convert_object(text: str) -> dict: + def _convert_object(text: str) -> dict: return safe_load(text.lstrip("window.__INITIAL_STATE__=")) @classmethod - def __filter_object(cls, data: dict) -> dict: + def _filter_object(cls, data: dict) -> dict: return cls.deep_get(data, cls.KEYS_LINK) or {} @classmethod @@ -55,3 +56,11 @@ class Converter: elif isinstance(data, list | tuple | set): return data[index] raise TypeError + + @staticmethod + def get_script(scripts: list) -> str: + scripts.reverse() + for script in scripts: + if script.startswith("window.__INITIAL_STATE__"): + return script + return "" diff --git a/source/module/manager.py b/source/module/manager.py index bb05458..99a3511 100644 --- a/source/module/manager.py +++ b/source/module/manager.py @@ -77,7 +77,7 @@ class Manager: "Sec-Ch-Ua": sec_ch_ua or SEC_CH_UA, "Sec-Ch-Ua-Platform": sec_ch_ua_platform or SEC_CH_UA_PLATFORM, } - self.headers = self.blank_headers | {"Cookie": cookie} + self.headers = self.blank_headers | {"Cookie": ""} self.retry = retry self.chunk = chunk self.name_format = self.__check_name_format(name_format)