From 1045d0aaf999728ea881f37e21b5c19230e0da1f Mon Sep 17 00:00:00 2001 From: Quan Date: Thu, 11 Dec 2025 23:00:08 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9E=E4=BD=9C=E5=93=81?= =?UTF-8?q?=E5=A4=84=E7=90=86=E7=BB=9F=E8=AE=A1=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- source/application/app.py | 83 ++++++++++++++++++++++++++++------ source/application/download.py | 2 +- static/Release_Notes.md | 3 +- uv.lock | 8 ++-- 4 files changed, 77 insertions(+), 19 deletions(-) diff --git a/source/application/app.py b/source/application/app.py index e0d7b64..d6db937 100644 --- a/source/application/app.py +++ b/source/application/app.py @@ -18,6 +18,7 @@ from fastapi.responses import RedirectResponse from fastmcp import FastMCP from typing import Annotated from pydantic import Field +from types import SimpleNamespace # from aiohttp import web from pyperclip import copy, paste @@ -188,13 +189,15 @@ class XHS: index, log, bar, + count: SimpleNamespace, ): name = self.__naming_rules(container) if (u := container["下载地址"]) and download: if await self.skip_download(i := container["作品ID"]): logging(log, _("作品 {0} 存在下载记录,跳过下载").format(i)) + count.skip += 1 else: - path, result = await self.download.run( + __, result = await self.download.run( u, container["动图地址"], index, @@ -207,9 +210,16 @@ class XHS: log, bar, ) - await self.__add_record(i, result) + if result: + count.success += 1 + await self.__add_record( + i, + ) + else: + count.fail += 1 elif not u: logging(log, _("提取作品文件下载地址失败"), ERROR) + count.fail += 1 await self.save_data(container) @data_cache @@ -223,9 +233,11 @@ class XHS: data.pop("时间戳", None) await self.data_recorder.add(**data) - async def __add_record(self, id_: str, result: list) -> None: - if all(result): - await self.id_recorder.add(id_) + async def __add_record( + self, + id_: str, + ) -> None: + await self.id_recorder.add(id_) async def extract( self, @@ -236,14 +248,17 @@ class XHS: bar=None, data=True, ) -> list[dict]: - # return # 调试代码 - urls = await self.extract_links(url, log) - if not urls: + if not (urls := await self.extract_links(url, log)): logging(log, _("提取小红书作品链接失败"), WARNING) - else: - logging(log, _("共 {0} 个小红书作品待处理...").format(len(urls))) - # return urls # 调试代码 - return [ + return [] + statistics = SimpleNamespace( + all=len(urls), + success=0, + fail=0, + skip=0, + ) + logging(log, _("共 {0} 个小红书作品待处理...").format(statistics.all)) + result = [ await self.__deal_extract( i, download, @@ -251,9 +266,27 @@ class XHS: log, bar, data, + count=statistics, ) for i in urls ] + self.show_statistics(statistics, log,) + return result + + @staticmethod + def show_statistics( + statistics: SimpleNamespace, + log=None, + ) -> None: + logging( + log, + _("共处理 {0} 个作品,成功 {1} 个,失败 {2} 个,跳过 {3} 个").format( + statistics.all, + statistics.success, + statistics.fail, + statistics.skip, + ), + ) async def extract_cli( self, @@ -278,6 +311,12 @@ class XHS: data, ) else: + statistics = SimpleNamespace( + all=len(url), + success=0, + fail=0, + skip=0, + ) [ await self.__deal_extract( u, @@ -286,9 +325,11 @@ class XHS: log, bar, data, + count=statistics, ) for u in url ] + self.show_statistics(statistics, log,) async def extract_links(self, url: str, log) -> list: urls = [] @@ -326,10 +367,17 @@ class XHS: data: bool, cookie: str = None, proxy: str = None, + count=SimpleNamespace( + all=0, + success=0, + fail=0, + skip=0, + ), ): if await self.skip_download(i := self.__extract_link_id(url)) and not data: msg = _("作品 {0} 存在下载记录,跳过处理").format(i) logging(log, msg) + count.skip += 1 return {"message": msg} logging(log, _("开始处理作品:{0}").format(i)) html = await self.html.request_url( @@ -341,11 +389,13 @@ class XHS: namespace = self.__generate_data_object(html) if not namespace: logging(log, _("{0} 获取数据失败").format(i), ERROR) + count.fail += 1 return {} data = self.explore.run(namespace) # logging(log, data) # 调试代码 if not data: logging(log, _("{0} 提取数据失败").format(i), ERROR) + count.fail += 1 return {} if data["作品类型"] == _("视频"): self.__extract_video(data, namespace) @@ -359,7 +409,14 @@ class XHS: data["下载地址"] = [] data["动图地址"] = [] await self.update_author_nickname(data, log) - await self.__download_files(data, download, index, log, bar) + await self.__download_files( + data, + download, + index, + log, + bar, + count, + ) logging(log, _("作品处理完成:{0}").format(i)) # await sleep_time() return data diff --git a/source/application/download.py b/source/application/download.py index 96b1118..5ef563b 100644 --- a/source/application/download.py +++ b/source/application/download.py @@ -114,7 +114,7 @@ class Download: for url, name, format_ in tasks ] tasks = await gather(*tasks) - return path, tasks + return path, tasks # 未解之谜 def __generate_path(self, nickname: str, filename: str): if self.author_archive: diff --git a/static/Release_Notes.md b/static/Release_Notes.md index c454b2c..2603852 100644 --- a/static/Release_Notes.md +++ b/static/Release_Notes.md @@ -1,7 +1,8 @@ **项目更新内容:** 1. 修复 MCP 模式无法下载文件的问题 -2. 调整内置延时机制 +2. 新增作品处理统计功能 +3. 调整内置延时机制 ***** diff --git a/uv.lock b/uv.lock index 51608e1..c2f7734 100644 --- a/uv.lock +++ b/uv.lock @@ -1239,16 +1239,16 @@ dev = [ requires-dist = [ { name = "aiofiles", specifier = ">=25.1.0" }, { name = "aiosqlite", specifier = ">=0.21.0" }, - { name = "click", specifier = ">=8.3.0" }, + { name = "click", specifier = ">=8.3.1" }, { name = "emoji", specifier = ">=2.15.0" }, - { name = "fastapi", specifier = ">=0.121.0" }, - { name = "fastmcp", specifier = ">=2.13.0" }, + { name = "fastapi", specifier = ">=0.123.10" }, + { name = "fastmcp", specifier = ">=2.13.3" }, { name = "httpx", extras = ["socks"], specifier = ">=0.28.1" }, { name = "lxml", specifier = ">=6.0.2" }, { name = "pyperclip", specifier = ">=1.11.0" }, { name = "pyyaml", specifier = ">=6.0.3" }, { name = "rookiepy", specifier = ">=0.5.6" }, - { name = "textual", specifier = ">=6.5.0" }, + { name = "textual", specifier = ">=6.7.1" }, { name = "uvicorn", specifier = ">=0.38.0" }, { name = "websockets", specifier = ">=15.0.1" }, ]