feat: 新增作品处理统计功能

This commit is contained in:
Quan 2025-12-11 23:00:08 +08:00
parent e747d94396
commit 1045d0aaf9
4 changed files with 77 additions and 19 deletions

View File

@ -18,6 +18,7 @@ from fastapi.responses import RedirectResponse
from fastmcp import FastMCP from fastmcp import FastMCP
from typing import Annotated from typing import Annotated
from pydantic import Field from pydantic import Field
from types import SimpleNamespace
# from aiohttp import web # from aiohttp import web
from pyperclip import copy, paste from pyperclip import copy, paste
@ -188,13 +189,15 @@ class XHS:
index, index,
log, log,
bar, bar,
count: SimpleNamespace,
): ):
name = self.__naming_rules(container) name = self.__naming_rules(container)
if (u := container["下载地址"]) and download: if (u := container["下载地址"]) and download:
if await self.skip_download(i := container["作品ID"]): if await self.skip_download(i := container["作品ID"]):
logging(log, _("作品 {0} 存在下载记录,跳过下载").format(i)) logging(log, _("作品 {0} 存在下载记录,跳过下载").format(i))
count.skip += 1
else: else:
path, result = await self.download.run( __, result = await self.download.run(
u, u,
container["动图地址"], container["动图地址"],
index, index,
@ -207,9 +210,16 @@ class XHS:
log, log,
bar, bar,
) )
await self.__add_record(i, result) if result:
count.success += 1
await self.__add_record(
i,
)
else:
count.fail += 1
elif not u: elif not u:
logging(log, _("提取作品文件下载地址失败"), ERROR) logging(log, _("提取作品文件下载地址失败"), ERROR)
count.fail += 1
await self.save_data(container) await self.save_data(container)
@data_cache @data_cache
@ -223,8 +233,10 @@ class XHS:
data.pop("时间戳", None) data.pop("时间戳", None)
await self.data_recorder.add(**data) await self.data_recorder.add(**data)
async def __add_record(self, id_: str, result: list) -> None: async def __add_record(
if all(result): self,
id_: str,
) -> None:
await self.id_recorder.add(id_) await self.id_recorder.add(id_)
async def extract( async def extract(
@ -236,14 +248,17 @@ class XHS:
bar=None, bar=None,
data=True, data=True,
) -> list[dict]: ) -> list[dict]:
# return # 调试代码 if not (urls := await self.extract_links(url, log)):
urls = await self.extract_links(url, log)
if not urls:
logging(log, _("提取小红书作品链接失败"), WARNING) logging(log, _("提取小红书作品链接失败"), WARNING)
else: return []
logging(log, _("{0} 个小红书作品待处理...").format(len(urls))) statistics = SimpleNamespace(
# return urls # 调试代码 all=len(urls),
return [ success=0,
fail=0,
skip=0,
)
logging(log, _("{0} 个小红书作品待处理...").format(statistics.all))
result = [
await self.__deal_extract( await self.__deal_extract(
i, i,
download, download,
@ -251,9 +266,27 @@ class XHS:
log, log,
bar, bar,
data, data,
count=statistics,
) )
for i in urls for i in urls
] ]
self.show_statistics(statistics, log,)
return result
@staticmethod
def show_statistics(
statistics: SimpleNamespace,
log=None,
) -> None:
logging(
log,
_("共处理 {0} 个作品,成功 {1} 个,失败 {2} 个,跳过 {3}").format(
statistics.all,
statistics.success,
statistics.fail,
statistics.skip,
),
)
async def extract_cli( async def extract_cli(
self, self,
@ -278,6 +311,12 @@ class XHS:
data, data,
) )
else: else:
statistics = SimpleNamespace(
all=len(url),
success=0,
fail=0,
skip=0,
)
[ [
await self.__deal_extract( await self.__deal_extract(
u, u,
@ -286,9 +325,11 @@ class XHS:
log, log,
bar, bar,
data, data,
count=statistics,
) )
for u in url for u in url
] ]
self.show_statistics(statistics, log,)
async def extract_links(self, url: str, log) -> list: async def extract_links(self, url: str, log) -> list:
urls = [] urls = []
@ -326,10 +367,17 @@ class XHS:
data: bool, data: bool,
cookie: str = None, cookie: str = None,
proxy: str = None, proxy: str = None,
count=SimpleNamespace(
all=0,
success=0,
fail=0,
skip=0,
),
): ):
if await self.skip_download(i := self.__extract_link_id(url)) and not data: if await self.skip_download(i := self.__extract_link_id(url)) and not data:
msg = _("作品 {0} 存在下载记录,跳过处理").format(i) msg = _("作品 {0} 存在下载记录,跳过处理").format(i)
logging(log, msg) logging(log, msg)
count.skip += 1
return {"message": msg} return {"message": msg}
logging(log, _("开始处理作品:{0}").format(i)) logging(log, _("开始处理作品:{0}").format(i))
html = await self.html.request_url( html = await self.html.request_url(
@ -341,11 +389,13 @@ class XHS:
namespace = self.__generate_data_object(html) namespace = self.__generate_data_object(html)
if not namespace: if not namespace:
logging(log, _("{0} 获取数据失败").format(i), ERROR) logging(log, _("{0} 获取数据失败").format(i), ERROR)
count.fail += 1
return {} return {}
data = self.explore.run(namespace) data = self.explore.run(namespace)
# logging(log, data) # 调试代码 # logging(log, data) # 调试代码
if not data: if not data:
logging(log, _("{0} 提取数据失败").format(i), ERROR) logging(log, _("{0} 提取数据失败").format(i), ERROR)
count.fail += 1
return {} return {}
if data["作品类型"] == _("视频"): if data["作品类型"] == _("视频"):
self.__extract_video(data, namespace) self.__extract_video(data, namespace)
@ -359,7 +409,14 @@ class XHS:
data["下载地址"] = [] data["下载地址"] = []
data["动图地址"] = [] data["动图地址"] = []
await self.update_author_nickname(data, log) await self.update_author_nickname(data, log)
await self.__download_files(data, download, index, log, bar) await self.__download_files(
data,
download,
index,
log,
bar,
count,
)
logging(log, _("作品处理完成:{0}").format(i)) logging(log, _("作品处理完成:{0}").format(i))
# await sleep_time() # await sleep_time()
return data return data

View File

@ -114,7 +114,7 @@ class Download:
for url, name, format_ in tasks for url, name, format_ in tasks
] ]
tasks = await gather(*tasks) tasks = await gather(*tasks)
return path, tasks return path, tasks # 未解之谜
def __generate_path(self, nickname: str, filename: str): def __generate_path(self, nickname: str, filename: str):
if self.author_archive: if self.author_archive:

View File

@ -1,7 +1,8 @@
**项目更新内容:** **项目更新内容:**
1. 修复 MCP 模式无法下载文件的问题 1. 修复 MCP 模式无法下载文件的问题
2. 调整内置延时机制 2. 新增作品处理统计功能
3. 调整内置延时机制
***** *****

8
uv.lock generated
View File

@ -1239,16 +1239,16 @@ dev = [
requires-dist = [ requires-dist = [
{ name = "aiofiles", specifier = ">=25.1.0" }, { name = "aiofiles", specifier = ">=25.1.0" },
{ name = "aiosqlite", specifier = ">=0.21.0" }, { name = "aiosqlite", specifier = ">=0.21.0" },
{ name = "click", specifier = ">=8.3.0" }, { name = "click", specifier = ">=8.3.1" },
{ name = "emoji", specifier = ">=2.15.0" }, { name = "emoji", specifier = ">=2.15.0" },
{ name = "fastapi", specifier = ">=0.121.0" }, { name = "fastapi", specifier = ">=0.123.10" },
{ name = "fastmcp", specifier = ">=2.13.0" }, { name = "fastmcp", specifier = ">=2.13.3" },
{ name = "httpx", extras = ["socks"], specifier = ">=0.28.1" }, { name = "httpx", extras = ["socks"], specifier = ">=0.28.1" },
{ name = "lxml", specifier = ">=6.0.2" }, { name = "lxml", specifier = ">=6.0.2" },
{ name = "pyperclip", specifier = ">=1.11.0" }, { name = "pyperclip", specifier = ">=1.11.0" },
{ name = "pyyaml", specifier = ">=6.0.3" }, { name = "pyyaml", specifier = ">=6.0.3" },
{ name = "rookiepy", specifier = ">=0.5.6" }, { name = "rookiepy", specifier = ">=0.5.6" },
{ name = "textual", specifier = ">=6.5.0" }, { name = "textual", specifier = ">=6.7.1" },
{ name = "uvicorn", specifier = ">=0.38.0" }, { name = "uvicorn", specifier = ">=0.38.0" },
{ name = "websockets", specifier = ">=15.0.1" }, { name = "websockets", specifier = ">=15.0.1" },
] ]