From ea349048c342ce55710c932ec594c07feacdf062 Mon Sep 17 00:00:00 2001 From: JoeanAmier Date: Sat, 13 Apr 2024 13:25:31 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E8=A7=86=E9=A2=91/=E5=9B=BE?= =?UTF-8?q?=E6=96=87=E4=BD=9C=E5=93=81=E6=96=87=E4=BB=B6=E4=B8=8B=E8=BD=BD?= =?UTF-8?q?=E5=BC=80=E5=85=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 12 ++++++++++++ locale/en_GB/LC_MESSAGES/xhs.mo | Bin 9996 -> 10846 bytes locale/en_GB/LC_MESSAGES/xhs.po | 20 +++++++++++++++++++- locale/zh_CN/LC_MESSAGES/xhs.po | 18 ++++++++++++++++++ source/TUI/app.py | 12 ++++++++++++ source/TUI/index.py | 2 +- source/TUI/monitor.py | 2 +- source/TUI/setting.py | 13 +++++++++---- source/application/app.py | 29 ++++++++++++++++++++++------- source/application/download.py | 11 ++++++++++- source/module/manager.py | 15 ++++++++++++--- source/module/settings.py | 15 +++++++++++++++ source/module/static.py | 6 +++--- 13 files changed, 134 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 3496cd1..eb788c8 100644 --- a/README.md +++ b/README.md @@ -199,6 +199,18 @@ async def example(): PNG +image_download +bool +图文作品文件下载开关 +true + + +video_download +bool +视频作品文件下载开关 +true + + folder_mode bool 是否将每个作品的文件储存至单独的文件夹;文件夹名称与文件名称保持一致 diff --git a/locale/en_GB/LC_MESSAGES/xhs.mo b/locale/en_GB/LC_MESSAGES/xhs.mo index a2ba0e89f7bfb52e4bd6b9bdcb4947823bf050a8..42653b7bb3437eea80727779b8a2de15ca6d459c 100644 GIT binary patch delta 2457 zcmb8veN5F=9LMp4$`wJyK+T85pNW==2x{bGmid&5vT|AcQQqKp;U?Tm+)Kn?R|`cH z6$d_)EY11AS}E%`bPC~GOI>Yc{i#`NT)4D0w&owSx$6D--HUG3A9c2KU+0|P?>v3a zxxa=@ceaG?qzyZ4NCo6a$Sp&R*{*UTKcwp}W5(kRd=!6**M|-_W*YTLScJ=P6Q0D$ z*pI20l4Q&;I2p6hH6rE$oM24IY@?9LgBsL;@1q8|8uvDiq&|S7FfrMfnV62_a7o+( zWI*G=g&4v#{1kKW8fu;aEW{}(>K`)ZEeaE9IDn61D=xvCaYr(H2KBY*On_QYh#xJq zA#N+qqy7bIqPwUKO=Q&`T#7WAQ}{4mz{i;1T%)iU`%x3j8XfDn5;agU>S*dw&(ER; z>Wa62jfzAc>is`aq0i*!8ZJad;vT9!bxf@PRMaL+R=i;`PNlvEE%aax)}bc8gc|53 z=3+O_#qkfZ6I_LJunhC?7;3_AQ2l>I%{PFGTw)sWSHm<$t;K9qJ&e3#uHqv64kuws zx-m0wCTf6&a08yi)%Yvw*3aiZs0F`_Y}VA^Lfnt)_c^|VcX18oGn?9*QJ3&b)RD~P zqUcVAmQzqj3h^1N$1MB`wXgxCk4Yo$+F2gbWS&PIQ8iA%dSs2}94cZrPz(GG|3DY3 z=YE?+!mNJjNFYOI1_kbs$ws}f0$IGtM@?LT3SBd50pFtr=*K)vU^X4aBAkUjT#3ix z-a;Ks1`oBsTy)_Jn4$lFAq6d<2HS8iDnv`U2_dyF$)HJ`dTa|ULx-?2!;VP%ZLQo&1R+Lc$kXJ_WJP2>S&0oh zN2)Rx-bqRPZX~ZJ>-ZG01?14P{5(hAL{`$J*pQ+ ztnc{l?#`A-)6V#_y(gmYbVe?I!W(@@Kkn~nh+YgwyXs<3!%4}964#yE-FNEHfA3_X z|LowbAbd3S)r8Ql{k@&%dydp~Uum}jb};Dml?1Y15BPmnZ^z|G+o4F;uISmFz3q^|o zi!Hy$lVt_Adn@k08-63>2UnrSc+*>K`|r2^pE1mfPSsj754EAx2E?7v0Z8?ef~&Sov2t!=w2Ype=)rMuh?+LeK9tDwww2iT@vMuQb( zOlK6guh^=rVvjTsuo600?h?0`7Qf}L3i``)nX|}UR#sz`*gl&T#CjLo9(Pq)@LyIo JttmP)^Dh%(=g$BD delta 1696 zcmX}sTWm~09LMqhR=cY=cB`&cwr;JqN-vaBmsacE5E7RV+o&Ru5|{LatVBeF&}7pR zf`r6_iie&iG$cxh7ZMQ?60O9GOC-Fo)`R%|&K@Q^`_*`a1* z@E%Svi`pv+8YqG4n3tZk8ndXc$83z?G;G6>*y-7gRn#A14*o<2w)70MEm(%S{}hhJ zt5}46Si|$}gIh4m&orCBg-xg#wxcG{ftuK5&pw<<{Wa=AY22h2D8^P?jq2y{HyZn~ z5Z~ZD)Z1wM8Z6-Xww{6pI*eL^i{1@)Py@a2+6Pc?@*8!3a9A?Lv+*hQ22=#HiMv7{ z=1=`+dDeQa#EGTtbF4UXcKt1RIDpJq9 z`X|pH7@|Fezqy!;<8V!g_)n#oYi18l{~cmN~V zh1yJU)aeKj&vGonRk#Hk@CItrenoyZB230gtiokc3hHj-zIH3;$q0 zvfu0hz18tKY6;#W%W9uc*AvJyEWmow#fnk)H=*`S1QqG?I31s3oz8zeS+E-7q|hHl zO{fF?*n>pb`cMyigNnpw)cpzc;e2wKtfV(n(gRs)Tc`?Y2wBUpOcggp%{VpgfLagj zcCEXT)>WHGJ9Y(GiI@DJ8oZJwr3t5zwPZRGS^_0aP|L(O$&D(wUs_o~&g1!3Np{aE zDs)PGTihd}vOXewquhQaqx>1M>TsQNH~h};d@H=" * + 50}", + ERROR, + ) async def action_settings(self): async def save_settings(data: dict) -> None: diff --git a/source/TUI/index.py b/source/TUI/index.py index 197505e..944e977 100644 --- a/source/TUI/index.py +++ b/source/TUI/index.py @@ -107,7 +107,7 @@ class Index(Screen): @work() async def deal(self): await self.app.push_screen("loading") - if any(await self.xhs.extract(self.url.value, True, log=self.tip)): + if any(await self.xhs.extract(self.url.value, True, log=self.tip, data=False, )): self.url.value = "" else: self.tip.write(Text(self.message("下载小红书作品文件失败"), style=ERROR)) diff --git a/source/TUI/monitor.py b/source/TUI/monitor.py index 9c31d8c..14bd698 100644 --- a/source/TUI/monitor.py +++ b/source/TUI/monitor.py @@ -46,7 +46,7 @@ class Monitor(Screen): @work() async def run_monitor(self): - await self.xhs.monitor(download=True, log=self.query_one(RichLog)) + await self.xhs.monitor(download=True, log=self.query_one(RichLog), data=False, ) self.action_close() def on_mount(self) -> None: diff --git a/source/TUI/setting.py b/source/TUI/setting.py index b693204..ff1dd41 100644 --- a/source/TUI/setting.py +++ b/source/TUI/setting.py @@ -27,6 +27,7 @@ class Setting(Screen): super().__init__() self.data = data self.message = message + def compose(self) -> ComposeResult: yield Header() yield ScrollableContainer( @@ -49,15 +50,17 @@ class Setting(Screen): Label(self.message("请求数据失败时,重试的最大次数"), classes="params", ), Input(str(self.data["max_retry"]), placeholder="5", type="integer", id="max_retry", ), Container( - Label("", classes="params", ), - Label("", classes="params", ), + Checkbox(self.message("记录作品数据"), id="record_data", value=self.data["record_data"], ), + Checkbox(self.message("作品文件夹归档模式"), id="folder_mode", value=self.data["folder_mode"], ), + Checkbox(self.message("视频作品下载开关"), id="video_download", value=self.data["video_download"], ), + Checkbox(self.message("图文作品下载开关"), id="image_download", value=self.data["image_download"], ), + classes="horizontal-layout"), + Container( Label(self.message("图片下载格式"), classes="params", ), Label(self.message("程序语言"), classes="params", ), classes="horizontal-layout", ), Container( - Checkbox(self.message("记录作品数据"), id="record_data", value=self.data["record_data"], ), - Checkbox(self.message("作品文件夹归档模式"), id="folder_mode", value=self.data["folder_mode"], ), Select.from_values( ("PNG", "WEBP"), value=self.data["image_format"], @@ -98,6 +101,8 @@ class Setting(Screen): "image_format": self.query_one("#image_format").value, "folder_mode": self.query_one("#folder_mode").value, "language": self.query_one("#language").value, + "image_download": self.query_one("#image_download").value, + "video_download": self.query_one("#video_download").value, }) @on(Button.Pressed, "#abandon") diff --git a/source/application/app.py b/source/application/app.py index 3a55975..482df55 100644 --- a/source/application/app.py +++ b/source/application/app.py @@ -7,6 +7,7 @@ from contextlib import suppress from datetime import datetime from re import compile from typing import Callable +from urllib.parse import urlparse from pyperclip import paste @@ -77,6 +78,8 @@ class XHS: max_retry, record_data, image_format, + image_download, + video_download, folder_mode, self.message, ) @@ -91,6 +94,7 @@ class XHS: self.clipboard_cache: str = "" self.queue = Queue() self.event = Event() + self.server = server def __extract_image(self, container: dict, data: Namespace): container["下载地址"] = self.image.get_image_link( @@ -126,7 +130,8 @@ class XHS: download=False, index: list | tuple = None, log=None, - bar=None) -> list[dict]: + bar=None, + data=True, ) -> list[dict]: # return # 调试代码 urls = await self.__extract_links(url, log) if not urls: @@ -135,19 +140,20 @@ class XHS: logging( log, self.message("共 {0} 个小红书作品待处理...").format(len(urls))) # return urls # 调试代码 - return [await self.__deal_extract(i, download, index, log, bar, ) for i in urls] + return [await self.__deal_extract(i, download, index, log, bar, data, ) for i in urls] async def extract_cli(self, url: str, download=True, index: list | tuple = None, log=None, - bar=None) -> None: + bar=None, + data=False, ) -> None: url = await self.__extract_links(url, log) if not url: logging(log, self.message("提取小红书作品链接失败"), WARNING) else: - await self.__deal_extract(url[0], download, index, log, bar) + await self.__deal_extract(url[0], download, index, log, bar, data, ) async def __extract_links(self, url: str, log) -> list: urls = [] @@ -161,7 +167,11 @@ class XHS: urls.append(u.group()) return urls - async def __deal_extract(self, url: str, download: bool, index: list | tuple | None, log, bar): + async def __deal_extract(self, url: str, download: bool, index: list | tuple | None, log, bar, data: bool, ): + if not data and await self.skip_download(i := self.__extract_link_id(url)): + msg = self.message("作品 {0} 存在下载记录,跳过处理").format(i) + logging(log, msg) + return {"message": msg} logging(log, self.message("开始处理作品:{0}").format(url)) html = await self.html.request_url(url, log=log) namespace = self.__generate_data_object(html) @@ -184,6 +194,11 @@ class XHS: logging(log, self.message("作品处理完成:{0}").format(url)) return data + @staticmethod + def __extract_link_id(url: str) -> str: + link = urlparse(url) + return link.path.split("/")[-1] + def __generate_data_object(self, html: str) -> Namespace: data = self.convert.run(html) return Namespace(data) @@ -194,7 +209,7 @@ class XHS: title = self.manager.filter_name(data["作品标题"]) or data["作品ID"] return f"{time_}_{author}_{title[:64]}" - async def monitor(self, delay=1, download=False, log=None, bar=None) -> None: + async def monitor(self, delay=1, download=False, log=None, bar=None, data=True, ) -> None: logging( None, self.message( @@ -202,7 +217,7 @@ class XHS: style=MASTER, ) self.event.clear() - await gather(self.__push_link(delay), self.__receive_link(delay, download, None, log, bar)) + await gather(self.__push_link(delay), self.__receive_link(delay, download, None, log, bar, data)) async def __push_link(self, delay: int): while not self.event.is_set(): diff --git a/source/application/download.py b/source/application/download.py index 63f40ce..9d485da 100644 --- a/source/application/download.py +++ b/source/application/download.py @@ -17,6 +17,7 @@ class Download: "image/jpeg": "jpg", "image/webp": "webp", "application/octet-stream": "", + "video/mp4": "mp4", "video/quicktime": "mov", } @@ -32,6 +33,8 @@ class Download: self.folder_mode = manager.folder_mode self.video_format = "mp4" self.image_format = manager.image_format + self.image_download = manager.image_download + self.video_download = manager.video_download async def run(self, urls: list, index: list | tuple | None, name: str, type_: str, log, bar) -> tuple[Path, tuple]: path = self.__generate_path(name) @@ -67,6 +70,9 @@ class Download: path: Path, name: str, log) -> list: + if not self.video_download: + logging(log, self.message("视频作品下载功能已关闭,跳过下载")) + return [] if any(path.glob(f"{name}.*")): logging(log, self.message("{0} 文件已存在,跳过下载").format(name)) return [] @@ -80,6 +86,9 @@ class Download: name: str, log) -> list: tasks = [] + if not self.image_download: + logging(log, self.message("图文作品下载功能已关闭,跳过下载")) + return tasks for i, j in enumerate(urls, start=1): if index and i not in index: continue @@ -94,6 +103,7 @@ class Download: @re_download async def __download(self, url: str, path: Path, name: str, format_: str, log, bar): + temp = self.temp.joinpath(name) try: async with self.session.get(url, proxy=self.proxy) as response: if response.status != 200: @@ -103,7 +113,6 @@ class Download: return False suffix = self.__extract_type( response.headers.get("Content-Type")) or format_ - temp = self.temp.joinpath(name) real = path.joinpath(f"{name}.{suffix}") # self.__create_progress( # bar, int( diff --git a/source/module/manager.py b/source/module/manager.py index 4263992..2cdd395 100644 --- a/source/module/manager.py +++ b/source/module/manager.py @@ -30,6 +30,8 @@ class Manager: retry: int, record_data: bool, image_format: str, + image_download: bool, + video_download: bool, folder_mode: bool, transition: Callable[[str], str], ): @@ -42,9 +44,9 @@ class Manager: self.headers = self.blank_headers | {"Cookie": cookie} self.retry = retry self.chunk = chunk - self.record_data = record_data + self.record_data = self.check_bool(record_data, False) self.image_format = self.__check_image_format(image_format) - self.folder_mode = folder_mode + self.folder_mode = self.check_bool(folder_mode, False) self.proxy = proxy self.request_session = ClientSession( headers=self.headers | { @@ -55,6 +57,8 @@ class Manager: headers=self.blank_headers, timeout=ClientTimeout(connect=timeout)) self.message = transition + self.image_download = self.check_bool(image_download, True) + self.video_download = self.check_bool(video_download, True) def __check_path(self, path: str) -> Path: if not path: @@ -88,7 +92,8 @@ class Manager: @staticmethod def delete(path: Path): - path.unlink() + if path.exists(): + path.unlink() @staticmethod def archive(root: Path, name: str, folder_mode: bool) -> Path: @@ -105,6 +110,10 @@ class Manager: name = self.NAME.sub("_", name) return sub(r"_+", "_", name).strip("_") + @staticmethod + def check_bool(value: bool, default: bool) -> bool: + return value if isinstance(value, bool) else default + async def close(self): await self.request_session.close() await self.download_session.close() diff --git a/source/module/settings.py b/source/module/settings.py index f386207..0a4ecca 100644 --- a/source/module/settings.py +++ b/source/module/settings.py @@ -44,3 +44,18 @@ class Settings: def update(self, data: dict): with self.file.open("w", encoding=self.encode) as f: dump(data, f, indent=4, ensure_ascii=False) + + @classmethod + def check_keys( + cls, + data: dict, + callback: callable, + *args, + **kwargs, + ) -> dict: + needful_keys = set(cls.default.keys()) + given_keys = set(data.keys()) + if not needful_keys.issubset(given_keys): + callback(*args, **kwargs) + return cls.default + return data diff --git a/source/module/static.py b/source/module/static.py index fa304a9..7488835 100644 --- a/source/module/static.py +++ b/source/module/static.py @@ -21,9 +21,9 @@ __all__ = [ "PROJECT", ] -VERSION_MAJOR = 1 -VERSION_MINOR = 9 -VERSION_BETA = False +VERSION_MAJOR = 2 +VERSION_MINOR = 0 +VERSION_BETA = True ROOT = Path(__file__).resolve().parent.parent.parent PROJECT = f"XHS-Downloader V{VERSION_MAJOR}.{ VERSION_MINOR}{" Beta" if VERSION_BETA else ""}"