mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2025-12-26 04:48:05 +08:00
feat: 新增作品处理统计功能
This commit is contained in:
parent
e747d94396
commit
1045d0aaf9
@ -18,6 +18,7 @@ from fastapi.responses import RedirectResponse
|
|||||||
from fastmcp import FastMCP
|
from fastmcp import FastMCP
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
# from aiohttp import web
|
# from aiohttp import web
|
||||||
from pyperclip import copy, paste
|
from pyperclip import copy, paste
|
||||||
@ -188,13 +189,15 @@ class XHS:
|
|||||||
index,
|
index,
|
||||||
log,
|
log,
|
||||||
bar,
|
bar,
|
||||||
|
count: SimpleNamespace,
|
||||||
):
|
):
|
||||||
name = self.__naming_rules(container)
|
name = self.__naming_rules(container)
|
||||||
if (u := container["下载地址"]) and download:
|
if (u := container["下载地址"]) and download:
|
||||||
if await self.skip_download(i := container["作品ID"]):
|
if await self.skip_download(i := container["作品ID"]):
|
||||||
logging(log, _("作品 {0} 存在下载记录,跳过下载").format(i))
|
logging(log, _("作品 {0} 存在下载记录,跳过下载").format(i))
|
||||||
|
count.skip += 1
|
||||||
else:
|
else:
|
||||||
path, result = await self.download.run(
|
__, result = await self.download.run(
|
||||||
u,
|
u,
|
||||||
container["动图地址"],
|
container["动图地址"],
|
||||||
index,
|
index,
|
||||||
@ -207,9 +210,16 @@ class XHS:
|
|||||||
log,
|
log,
|
||||||
bar,
|
bar,
|
||||||
)
|
)
|
||||||
await self.__add_record(i, result)
|
if result:
|
||||||
|
count.success += 1
|
||||||
|
await self.__add_record(
|
||||||
|
i,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
count.fail += 1
|
||||||
elif not u:
|
elif not u:
|
||||||
logging(log, _("提取作品文件下载地址失败"), ERROR)
|
logging(log, _("提取作品文件下载地址失败"), ERROR)
|
||||||
|
count.fail += 1
|
||||||
await self.save_data(container)
|
await self.save_data(container)
|
||||||
|
|
||||||
@data_cache
|
@data_cache
|
||||||
@ -223,8 +233,10 @@ class XHS:
|
|||||||
data.pop("时间戳", None)
|
data.pop("时间戳", None)
|
||||||
await self.data_recorder.add(**data)
|
await self.data_recorder.add(**data)
|
||||||
|
|
||||||
async def __add_record(self, id_: str, result: list) -> None:
|
async def __add_record(
|
||||||
if all(result):
|
self,
|
||||||
|
id_: str,
|
||||||
|
) -> None:
|
||||||
await self.id_recorder.add(id_)
|
await self.id_recorder.add(id_)
|
||||||
|
|
||||||
async def extract(
|
async def extract(
|
||||||
@ -236,14 +248,17 @@ class XHS:
|
|||||||
bar=None,
|
bar=None,
|
||||||
data=True,
|
data=True,
|
||||||
) -> list[dict]:
|
) -> list[dict]:
|
||||||
# return # 调试代码
|
if not (urls := await self.extract_links(url, log)):
|
||||||
urls = await self.extract_links(url, log)
|
|
||||||
if not urls:
|
|
||||||
logging(log, _("提取小红书作品链接失败"), WARNING)
|
logging(log, _("提取小红书作品链接失败"), WARNING)
|
||||||
else:
|
return []
|
||||||
logging(log, _("共 {0} 个小红书作品待处理...").format(len(urls)))
|
statistics = SimpleNamespace(
|
||||||
# return urls # 调试代码
|
all=len(urls),
|
||||||
return [
|
success=0,
|
||||||
|
fail=0,
|
||||||
|
skip=0,
|
||||||
|
)
|
||||||
|
logging(log, _("共 {0} 个小红书作品待处理...").format(statistics.all))
|
||||||
|
result = [
|
||||||
await self.__deal_extract(
|
await self.__deal_extract(
|
||||||
i,
|
i,
|
||||||
download,
|
download,
|
||||||
@ -251,9 +266,27 @@ class XHS:
|
|||||||
log,
|
log,
|
||||||
bar,
|
bar,
|
||||||
data,
|
data,
|
||||||
|
count=statistics,
|
||||||
)
|
)
|
||||||
for i in urls
|
for i in urls
|
||||||
]
|
]
|
||||||
|
self.show_statistics(statistics, log,)
|
||||||
|
return result
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def show_statistics(
|
||||||
|
statistics: SimpleNamespace,
|
||||||
|
log=None,
|
||||||
|
) -> None:
|
||||||
|
logging(
|
||||||
|
log,
|
||||||
|
_("共处理 {0} 个作品,成功 {1} 个,失败 {2} 个,跳过 {3} 个").format(
|
||||||
|
statistics.all,
|
||||||
|
statistics.success,
|
||||||
|
statistics.fail,
|
||||||
|
statistics.skip,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
async def extract_cli(
|
async def extract_cli(
|
||||||
self,
|
self,
|
||||||
@ -278,6 +311,12 @@ class XHS:
|
|||||||
data,
|
data,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
statistics = SimpleNamespace(
|
||||||
|
all=len(url),
|
||||||
|
success=0,
|
||||||
|
fail=0,
|
||||||
|
skip=0,
|
||||||
|
)
|
||||||
[
|
[
|
||||||
await self.__deal_extract(
|
await self.__deal_extract(
|
||||||
u,
|
u,
|
||||||
@ -286,9 +325,11 @@ class XHS:
|
|||||||
log,
|
log,
|
||||||
bar,
|
bar,
|
||||||
data,
|
data,
|
||||||
|
count=statistics,
|
||||||
)
|
)
|
||||||
for u in url
|
for u in url
|
||||||
]
|
]
|
||||||
|
self.show_statistics(statistics, log,)
|
||||||
|
|
||||||
async def extract_links(self, url: str, log) -> list:
|
async def extract_links(self, url: str, log) -> list:
|
||||||
urls = []
|
urls = []
|
||||||
@ -326,10 +367,17 @@ class XHS:
|
|||||||
data: bool,
|
data: bool,
|
||||||
cookie: str = None,
|
cookie: str = None,
|
||||||
proxy: str = None,
|
proxy: str = None,
|
||||||
|
count=SimpleNamespace(
|
||||||
|
all=0,
|
||||||
|
success=0,
|
||||||
|
fail=0,
|
||||||
|
skip=0,
|
||||||
|
),
|
||||||
):
|
):
|
||||||
if await self.skip_download(i := self.__extract_link_id(url)) and not data:
|
if await self.skip_download(i := self.__extract_link_id(url)) and not data:
|
||||||
msg = _("作品 {0} 存在下载记录,跳过处理").format(i)
|
msg = _("作品 {0} 存在下载记录,跳过处理").format(i)
|
||||||
logging(log, msg)
|
logging(log, msg)
|
||||||
|
count.skip += 1
|
||||||
return {"message": msg}
|
return {"message": msg}
|
||||||
logging(log, _("开始处理作品:{0}").format(i))
|
logging(log, _("开始处理作品:{0}").format(i))
|
||||||
html = await self.html.request_url(
|
html = await self.html.request_url(
|
||||||
@ -341,11 +389,13 @@ class XHS:
|
|||||||
namespace = self.__generate_data_object(html)
|
namespace = self.__generate_data_object(html)
|
||||||
if not namespace:
|
if not namespace:
|
||||||
logging(log, _("{0} 获取数据失败").format(i), ERROR)
|
logging(log, _("{0} 获取数据失败").format(i), ERROR)
|
||||||
|
count.fail += 1
|
||||||
return {}
|
return {}
|
||||||
data = self.explore.run(namespace)
|
data = self.explore.run(namespace)
|
||||||
# logging(log, data) # 调试代码
|
# logging(log, data) # 调试代码
|
||||||
if not data:
|
if not data:
|
||||||
logging(log, _("{0} 提取数据失败").format(i), ERROR)
|
logging(log, _("{0} 提取数据失败").format(i), ERROR)
|
||||||
|
count.fail += 1
|
||||||
return {}
|
return {}
|
||||||
if data["作品类型"] == _("视频"):
|
if data["作品类型"] == _("视频"):
|
||||||
self.__extract_video(data, namespace)
|
self.__extract_video(data, namespace)
|
||||||
@ -359,7 +409,14 @@ class XHS:
|
|||||||
data["下载地址"] = []
|
data["下载地址"] = []
|
||||||
data["动图地址"] = []
|
data["动图地址"] = []
|
||||||
await self.update_author_nickname(data, log)
|
await self.update_author_nickname(data, log)
|
||||||
await self.__download_files(data, download, index, log, bar)
|
await self.__download_files(
|
||||||
|
data,
|
||||||
|
download,
|
||||||
|
index,
|
||||||
|
log,
|
||||||
|
bar,
|
||||||
|
count,
|
||||||
|
)
|
||||||
logging(log, _("作品处理完成:{0}").format(i))
|
logging(log, _("作品处理完成:{0}").format(i))
|
||||||
# await sleep_time()
|
# await sleep_time()
|
||||||
return data
|
return data
|
||||||
|
|||||||
@ -114,7 +114,7 @@ class Download:
|
|||||||
for url, name, format_ in tasks
|
for url, name, format_ in tasks
|
||||||
]
|
]
|
||||||
tasks = await gather(*tasks)
|
tasks = await gather(*tasks)
|
||||||
return path, tasks
|
return path, tasks # 未解之谜
|
||||||
|
|
||||||
def __generate_path(self, nickname: str, filename: str):
|
def __generate_path(self, nickname: str, filename: str):
|
||||||
if self.author_archive:
|
if self.author_archive:
|
||||||
|
|||||||
@ -1,7 +1,8 @@
|
|||||||
**项目更新内容:**
|
**项目更新内容:**
|
||||||
|
|
||||||
1. 修复 MCP 模式无法下载文件的问题
|
1. 修复 MCP 模式无法下载文件的问题
|
||||||
2. 调整内置延时机制
|
2. 新增作品处理统计功能
|
||||||
|
3. 调整内置延时机制
|
||||||
|
|
||||||
*****
|
*****
|
||||||
|
|
||||||
|
|||||||
8
uv.lock
generated
8
uv.lock
generated
@ -1239,16 +1239,16 @@ dev = [
|
|||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "aiofiles", specifier = ">=25.1.0" },
|
{ name = "aiofiles", specifier = ">=25.1.0" },
|
||||||
{ name = "aiosqlite", specifier = ">=0.21.0" },
|
{ name = "aiosqlite", specifier = ">=0.21.0" },
|
||||||
{ name = "click", specifier = ">=8.3.0" },
|
{ name = "click", specifier = ">=8.3.1" },
|
||||||
{ name = "emoji", specifier = ">=2.15.0" },
|
{ name = "emoji", specifier = ">=2.15.0" },
|
||||||
{ name = "fastapi", specifier = ">=0.121.0" },
|
{ name = "fastapi", specifier = ">=0.123.10" },
|
||||||
{ name = "fastmcp", specifier = ">=2.13.0" },
|
{ name = "fastmcp", specifier = ">=2.13.3" },
|
||||||
{ name = "httpx", extras = ["socks"], specifier = ">=0.28.1" },
|
{ name = "httpx", extras = ["socks"], specifier = ">=0.28.1" },
|
||||||
{ name = "lxml", specifier = ">=6.0.2" },
|
{ name = "lxml", specifier = ">=6.0.2" },
|
||||||
{ name = "pyperclip", specifier = ">=1.11.0" },
|
{ name = "pyperclip", specifier = ">=1.11.0" },
|
||||||
{ name = "pyyaml", specifier = ">=6.0.3" },
|
{ name = "pyyaml", specifier = ">=6.0.3" },
|
||||||
{ name = "rookiepy", specifier = ">=0.5.6" },
|
{ name = "rookiepy", specifier = ">=0.5.6" },
|
||||||
{ name = "textual", specifier = ">=6.5.0" },
|
{ name = "textual", specifier = ">=6.7.1" },
|
||||||
{ name = "uvicorn", specifier = ">=0.38.0" },
|
{ name = "uvicorn", specifier = ">=0.38.0" },
|
||||||
{ name = "websockets", specifier = ">=15.0.1" },
|
{ name = "websockets", specifier = ">=15.0.1" },
|
||||||
]
|
]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user