mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2025-12-25 20:36:47 +08:00
feat: 新增作品处理统计功能
This commit is contained in:
parent
e747d94396
commit
1045d0aaf9
@ -18,6 +18,7 @@ from fastapi.responses import RedirectResponse
|
||||
from fastmcp import FastMCP
|
||||
from typing import Annotated
|
||||
from pydantic import Field
|
||||
from types import SimpleNamespace
|
||||
|
||||
# from aiohttp import web
|
||||
from pyperclip import copy, paste
|
||||
@ -188,13 +189,15 @@ class XHS:
|
||||
index,
|
||||
log,
|
||||
bar,
|
||||
count: SimpleNamespace,
|
||||
):
|
||||
name = self.__naming_rules(container)
|
||||
if (u := container["下载地址"]) and download:
|
||||
if await self.skip_download(i := container["作品ID"]):
|
||||
logging(log, _("作品 {0} 存在下载记录,跳过下载").format(i))
|
||||
count.skip += 1
|
||||
else:
|
||||
path, result = await self.download.run(
|
||||
__, result = await self.download.run(
|
||||
u,
|
||||
container["动图地址"],
|
||||
index,
|
||||
@ -207,9 +210,16 @@ class XHS:
|
||||
log,
|
||||
bar,
|
||||
)
|
||||
await self.__add_record(i, result)
|
||||
if result:
|
||||
count.success += 1
|
||||
await self.__add_record(
|
||||
i,
|
||||
)
|
||||
else:
|
||||
count.fail += 1
|
||||
elif not u:
|
||||
logging(log, _("提取作品文件下载地址失败"), ERROR)
|
||||
count.fail += 1
|
||||
await self.save_data(container)
|
||||
|
||||
@data_cache
|
||||
@ -223,9 +233,11 @@ class XHS:
|
||||
data.pop("时间戳", None)
|
||||
await self.data_recorder.add(**data)
|
||||
|
||||
async def __add_record(self, id_: str, result: list) -> None:
|
||||
if all(result):
|
||||
await self.id_recorder.add(id_)
|
||||
async def __add_record(
|
||||
self,
|
||||
id_: str,
|
||||
) -> None:
|
||||
await self.id_recorder.add(id_)
|
||||
|
||||
async def extract(
|
||||
self,
|
||||
@ -236,14 +248,17 @@ class XHS:
|
||||
bar=None,
|
||||
data=True,
|
||||
) -> list[dict]:
|
||||
# return # 调试代码
|
||||
urls = await self.extract_links(url, log)
|
||||
if not urls:
|
||||
if not (urls := await self.extract_links(url, log)):
|
||||
logging(log, _("提取小红书作品链接失败"), WARNING)
|
||||
else:
|
||||
logging(log, _("共 {0} 个小红书作品待处理...").format(len(urls)))
|
||||
# return urls # 调试代码
|
||||
return [
|
||||
return []
|
||||
statistics = SimpleNamespace(
|
||||
all=len(urls),
|
||||
success=0,
|
||||
fail=0,
|
||||
skip=0,
|
||||
)
|
||||
logging(log, _("共 {0} 个小红书作品待处理...").format(statistics.all))
|
||||
result = [
|
||||
await self.__deal_extract(
|
||||
i,
|
||||
download,
|
||||
@ -251,9 +266,27 @@ class XHS:
|
||||
log,
|
||||
bar,
|
||||
data,
|
||||
count=statistics,
|
||||
)
|
||||
for i in urls
|
||||
]
|
||||
self.show_statistics(statistics, log,)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def show_statistics(
|
||||
statistics: SimpleNamespace,
|
||||
log=None,
|
||||
) -> None:
|
||||
logging(
|
||||
log,
|
||||
_("共处理 {0} 个作品,成功 {1} 个,失败 {2} 个,跳过 {3} 个").format(
|
||||
statistics.all,
|
||||
statistics.success,
|
||||
statistics.fail,
|
||||
statistics.skip,
|
||||
),
|
||||
)
|
||||
|
||||
async def extract_cli(
|
||||
self,
|
||||
@ -278,6 +311,12 @@ class XHS:
|
||||
data,
|
||||
)
|
||||
else:
|
||||
statistics = SimpleNamespace(
|
||||
all=len(url),
|
||||
success=0,
|
||||
fail=0,
|
||||
skip=0,
|
||||
)
|
||||
[
|
||||
await self.__deal_extract(
|
||||
u,
|
||||
@ -286,9 +325,11 @@ class XHS:
|
||||
log,
|
||||
bar,
|
||||
data,
|
||||
count=statistics,
|
||||
)
|
||||
for u in url
|
||||
]
|
||||
self.show_statistics(statistics, log,)
|
||||
|
||||
async def extract_links(self, url: str, log) -> list:
|
||||
urls = []
|
||||
@ -326,10 +367,17 @@ class XHS:
|
||||
data: bool,
|
||||
cookie: str = None,
|
||||
proxy: str = None,
|
||||
count=SimpleNamespace(
|
||||
all=0,
|
||||
success=0,
|
||||
fail=0,
|
||||
skip=0,
|
||||
),
|
||||
):
|
||||
if await self.skip_download(i := self.__extract_link_id(url)) and not data:
|
||||
msg = _("作品 {0} 存在下载记录,跳过处理").format(i)
|
||||
logging(log, msg)
|
||||
count.skip += 1
|
||||
return {"message": msg}
|
||||
logging(log, _("开始处理作品:{0}").format(i))
|
||||
html = await self.html.request_url(
|
||||
@ -341,11 +389,13 @@ class XHS:
|
||||
namespace = self.__generate_data_object(html)
|
||||
if not namespace:
|
||||
logging(log, _("{0} 获取数据失败").format(i), ERROR)
|
||||
count.fail += 1
|
||||
return {}
|
||||
data = self.explore.run(namespace)
|
||||
# logging(log, data) # 调试代码
|
||||
if not data:
|
||||
logging(log, _("{0} 提取数据失败").format(i), ERROR)
|
||||
count.fail += 1
|
||||
return {}
|
||||
if data["作品类型"] == _("视频"):
|
||||
self.__extract_video(data, namespace)
|
||||
@ -359,7 +409,14 @@ class XHS:
|
||||
data["下载地址"] = []
|
||||
data["动图地址"] = []
|
||||
await self.update_author_nickname(data, log)
|
||||
await self.__download_files(data, download, index, log, bar)
|
||||
await self.__download_files(
|
||||
data,
|
||||
download,
|
||||
index,
|
||||
log,
|
||||
bar,
|
||||
count,
|
||||
)
|
||||
logging(log, _("作品处理完成:{0}").format(i))
|
||||
# await sleep_time()
|
||||
return data
|
||||
|
||||
@ -114,7 +114,7 @@ class Download:
|
||||
for url, name, format_ in tasks
|
||||
]
|
||||
tasks = await gather(*tasks)
|
||||
return path, tasks
|
||||
return path, tasks # 未解之谜
|
||||
|
||||
def __generate_path(self, nickname: str, filename: str):
|
||||
if self.author_archive:
|
||||
|
||||
@ -1,7 +1,8 @@
|
||||
**项目更新内容:**
|
||||
|
||||
1. 修复 MCP 模式无法下载文件的问题
|
||||
2. 调整内置延时机制
|
||||
2. 新增作品处理统计功能
|
||||
3. 调整内置延时机制
|
||||
|
||||
*****
|
||||
|
||||
|
||||
8
uv.lock
generated
8
uv.lock
generated
@ -1239,16 +1239,16 @@ dev = [
|
||||
requires-dist = [
|
||||
{ name = "aiofiles", specifier = ">=25.1.0" },
|
||||
{ name = "aiosqlite", specifier = ">=0.21.0" },
|
||||
{ name = "click", specifier = ">=8.3.0" },
|
||||
{ name = "click", specifier = ">=8.3.1" },
|
||||
{ name = "emoji", specifier = ">=2.15.0" },
|
||||
{ name = "fastapi", specifier = ">=0.121.0" },
|
||||
{ name = "fastmcp", specifier = ">=2.13.0" },
|
||||
{ name = "fastapi", specifier = ">=0.123.10" },
|
||||
{ name = "fastmcp", specifier = ">=2.13.3" },
|
||||
{ name = "httpx", extras = ["socks"], specifier = ">=0.28.1" },
|
||||
{ name = "lxml", specifier = ">=6.0.2" },
|
||||
{ name = "pyperclip", specifier = ">=1.11.0" },
|
||||
{ name = "pyyaml", specifier = ">=6.0.3" },
|
||||
{ name = "rookiepy", specifier = ">=0.5.6" },
|
||||
{ name = "textual", specifier = ">=6.5.0" },
|
||||
{ name = "textual", specifier = ">=6.7.1" },
|
||||
{ name = "uvicorn", specifier = ">=0.38.0" },
|
||||
{ name = "websockets", specifier = ">=15.0.1" },
|
||||
]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user