style: 代码格式化和字符串处理优化

- 优化代码缩进和换行,提高可读性
- 统一使用单引号或双引号,保持一致性
- 移除冗余的空格和括号,精简代码
This commit is contained in:
2025-02-15 21:30:24 +08:00
parent 94198f5a51
commit 1570ba320c
34 changed files with 948 additions and 358 deletions

View File

@@ -48,11 +48,17 @@ __all__ = ["XHS"]
def _data_cache(function):
async def inner(self, data: dict, ):
async def inner(
self,
data: dict,
):
if self.manager.record_data:
download = data["下载地址"]
lives = data["动图地址"]
await function(self, data, )
await function(
self,
data,
)
data["下载地址"] = download
data["动图地址"] = lives
@@ -137,11 +143,14 @@ class XHS:
def __extract_image(self, container: dict, data: Namespace):
container["下载地址"], container["动图地址"] = self.image.get_image_link(
data, self.manager.image_format)
data, self.manager.image_format
)
def __extract_video(self, container: dict, data: Namespace):
container["下载地址"] = self.video.get_video_link(data)
container["动图地址"] = [None, ]
container["动图地址"] = [
None,
]
async def __download_files(
self,
@@ -154,8 +163,7 @@ class XHS:
name = self.__naming_rules(container)
if (u := container["下载地址"]) and download:
if await self.skip_download(i := container["作品ID"]):
logging(
log, _("作品 {0} 存在下载记录,跳过下载").format(i))
logging(log, _("作品 {0} 存在下载记录,跳过下载").format(i))
else:
path, result = await self.download.run(
u,
@@ -172,7 +180,10 @@ class XHS:
await self.save_data(container)
@_data_cache
async def save_data(self, data: dict, ):
async def save_data(
self,
data: dict,
):
data["采集时间"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
data["下载地址"] = " ".join(data["下载地址"])
data["动图地址"] = " ".join(i or "NaN" for i in data["动图地址"])
@@ -196,10 +207,19 @@ class XHS:
if not urls:
logging(log, _("提取小红书作品链接失败"), WARNING)
else:
logging(
log, _("{0} 个小红书作品待处理...").format(len(urls)))
logging(log, _("{0} 个小红书作品待处理...").format(len(urls)))
# return urls # 调试代码
return [await self.__deal_extract(i, download, index, log, bar, data, ) for i in urls]
return [
await self.__deal_extract(
i,
download,
index,
log,
bar,
data,
)
for i in urls
]
async def extract_cli(
self,
@@ -214,7 +234,14 @@ class XHS:
if not url:
logging(log, _("提取小红书作品链接失败"), WARNING)
else:
await self.__deal_extract(url[0], download, index, log, bar, data, )
await self.__deal_extract(
url[0],
download,
index,
log,
bar,
data,
)
async def extract_links(self, url: str, log) -> list:
urls = []
@@ -253,7 +280,11 @@ class XHS:
logging(log, msg)
return {"message": msg}
logging(log, _("开始处理作品:{0}").format(i))
html = await self.html.request_url(url, log=log, cookie=cookie, )
html = await self.html.request_url(
url,
log=log,
cookie=cookie,
)
namespace = self.__generate_data_object(html)
if not namespace:
logging(log, _("{0} 获取数据失败").format(i), ERROR)
@@ -299,10 +330,12 @@ class XHS:
return beautify_string(
self.CLEANER.filter_name(
self.manager.SEPARATE.join(values),
default=self.manager.SEPARATE.join((
data["作者ID"],
data["ID"],
)),
default=self.manager.SEPARATE.join(
(
data["ID"],
data["作品ID"],
)
),
),
length=128,
)
@@ -315,10 +348,13 @@ class XHS:
return self.manager.filter_name(data["作者昵称"]) or data["作者ID"]
def __get_name_title(self, data: dict) -> str:
return beautify_string(
self.manager.filter_name(data["作品标题"]),
64,
) or data["作品ID"]
return (
beautify_string(
self.manager.filter_name(data["作品标题"]),
64,
)
or data["作品ID"]
)
async def monitor(
self,
@@ -331,11 +367,15 @@ class XHS:
logging(
None,
_(
"程序会自动读取并提取剪贴板中的小红书作品链接,并自动下载链接对应的作品文件,如需关闭,请点击关闭按钮,或者向剪贴板写入 “close” 文本!"),
"程序会自动读取并提取剪贴板中的小红书作品链接,并自动下载链接对应的作品文件,如需关闭,请点击关闭按钮,或者向剪贴板写入 “close” 文本!"
),
style=MASTER,
)
self.event.clear()
await gather(self.__push_link(delay), self.__receive_link(delay, download, None, log, bar, data))
await gather(
self.__push_link(delay),
self.__receive_link(delay, download, None, log, bar, data),
)
async def __push_link(self, delay: int):
while not self.event.is_set():
@@ -373,10 +413,16 @@ class XHS:
@staticmethod
def read_browser_cookie(value: str | int) -> str:
return BrowserCookie.get(
value,
domains=["xiaohongshu.com", ],
) if value else ""
return (
BrowserCookie.get(
value,
domains=[
"xiaohongshu.com",
],
)
if value
else ""
)
# @staticmethod
# async def index(request):
@@ -425,11 +471,17 @@ class XHS:
# await self.runner.cleanup()
# logging(log, _("Web API 服务器已关闭!"))
async def run_server(self, host="0.0.0.0", port=8000, log_level="info", ):
async def run_server(
self,
host="0.0.0.0",
port=8000,
log_level="info",
):
self.server = FastAPI(
debug=self.VERSION_BETA,
title="XHS-Downloader",
version=f"{self.VERSION_MAJOR}.{self.VERSION_MINOR}")
version=f"{self.VERSION_MAJOR}.{self.VERSION_MINOR}",
)
self.setup_routes()
config = Config(
self.server,
@@ -445,7 +497,10 @@ class XHS:
async def index():
return RedirectResponse(url=REPOSITORY)
@self.server.post("/xhs/", response_model=ExtractData, )
@self.server.post(
"/xhs/",
response_model=ExtractData,
)
async def handle(extract: ExtractParams):
url = await self.extract_links(extract.url, None)
if not url:
@@ -466,6 +521,5 @@ class XHS:
msg = _("获取小红书作品数据失败")
data = None
return ExtractData(
message=msg,
url=url[0] if url else extract.url,
data=data)
message=msg, url=url[0] if url else extract.url, data=data
)

View File

@@ -23,7 +23,7 @@ from ..translation import _
if TYPE_CHECKING:
from httpx import AsyncClient
__all__ = ['Download']
__all__ = ["Download"]
class Download:
@@ -38,7 +38,10 @@ class Download:
"audio/mpeg": "mp3",
}
def __init__(self, manager: Manager, ):
def __init__(
self,
manager: Manager,
):
self.manager = manager
self.folder = manager.folder
self.temp = manager.temp
@@ -98,7 +101,8 @@ class Download:
format_,
log,
bar,
) for url, name, format_ in tasks
)
for url, name, format_ in tasks
]
tasks = await gather(*tasks)
return path, tasks
@@ -109,11 +113,8 @@ class Download:
return path
def __ready_download_video(
self,
urls: list[str],
path: Path,
name: str,
log) -> list:
self, urls: list[str], path: Path, name: str, log
) -> list:
if not self.video_download:
logging(log, _("视频作品下载功能已关闭,跳过下载"))
return []
@@ -128,7 +129,8 @@ class Download:
index: list | tuple | None,
path: Path,
name: str,
log) -> list:
log,
) -> list:
tasks = []
if not self.image_download:
logging(log, _("图文作品下载功能已关闭,跳过下载"))
@@ -146,28 +148,38 @@ class Download:
for s in self.image_format_list
):
tasks.append([j[0], file, self.image_format])
if not self.live_download or not j[1] or self.__check_exists_path(
if (
not self.live_download
or not j[1]
or self.__check_exists_path(
path,
f"{file}.{self.live_format}",
log,
)
):
continue
tasks.append([j[1], file, self.live_format])
return tasks
def __check_exists_glob(self, path: Path, name: str, log, ) -> bool:
def __check_exists_glob(
self,
path: Path,
name: str,
log,
) -> bool:
if any(path.glob(name)):
logging(
log, _(
"{0} 文件已存在,跳过下载").format(name))
logging(log, _("{0} 文件已存在,跳过下载").format(name))
return True
return False
def __check_exists_path(self, path: Path, name: str, log, ) -> bool:
def __check_exists_path(
self,
path: Path,
name: str,
log,
) -> bool:
if path.joinpath(name).exists():
logging(
log, _(
"{0} 文件已存在,跳过下载").format(name))
logging(log, _("{0} 文件已存在,跳过下载").format(name))
return True
return False
@@ -199,9 +211,16 @@ class Download:
# return False
# temp = self.temp.joinpath(f"{name}.{suffix}")
temp = self.temp.joinpath(f"{name}.{format_}")
self.__update_headers_range(headers, temp, )
self.__update_headers_range(
headers,
temp,
)
try:
async with self.client.stream("GET", url, headers=headers, ) as response:
async with self.client.stream(
"GET",
url,
headers=headers,
) as response:
await sleep_time()
if response.status_code == 416:
raise CacheError(
@@ -234,8 +253,9 @@ class Download:
# self.__create_progress(bar, None)
logging(
log,
_(
"网络异常,{0} 下载失败,错误信息: {1}").format(name, repr(error)),
_("网络异常,{0} 下载失败,错误信息: {1}").format(
name, repr(error)
),
ERROR,
)
return False
@@ -248,7 +268,11 @@ class Download:
)
@staticmethod
def __create_progress(bar, total: int | None, completed=0, ):
def __create_progress(
bar,
total: int | None,
completed=0,
):
if bar:
bar.update(total=total, completed=completed)
@@ -273,10 +297,8 @@ class Download:
)
await sleep_time()
response.raise_for_status()
suffix = self.__extract_type(
response.headers.get("Content-Type")) or suffix
length = response.headers.get(
"Content-Length", 0)
suffix = self.__extract_type(response.headers.get("Content-Type")) or suffix
length = response.headers.get("Content-Length", 0)
return int(length), suffix
@staticmethod
@@ -303,12 +325,14 @@ class Download:
async with open(temp, "rb") as f:
file_start = await f.read(FILE_SIGNATURES_LENGTH)
for offset, signature, suffix in FILE_SIGNATURES:
if file_start[offset:offset + len(signature)] == signature:
if file_start[offset: offset + len(signature)] == signature:
return path.joinpath(f"{name}.{suffix}")
except Exception as error:
logging(
log,
_("文件 {0} 格式判断失败,错误信息:{1}").format(temp.name, repr(error)),
_("文件 {0} 格式判断失败,错误信息:{1}").format(
temp.name, repr(error)
),
ERROR,
)
return path.joinpath(f"{name}.{default_suffix}")

View File

@@ -3,7 +3,7 @@ from datetime import datetime
from ..expansion import Namespace
from ..translation import _
__all__ = ['Explore']
__all__ = ["Explore"]
class Explore:
@@ -27,10 +27,8 @@ class Explore:
@staticmethod
def __extract_interact_info(container: dict, data: Namespace) -> None:
container["收藏数量"] = data.safe_extract(
"interactInfo.collectedCount", "-1")
container["评论数量"] = data.safe_extract(
"interactInfo.commentCount", "-1")
container["收藏数量"] = data.safe_extract("interactInfo.collectedCount", "-1")
container["评论数量"] = data.safe_extract("interactInfo.commentCount", "-1")
container["分享数量"] = data.safe_extract("interactInfo.shareCount", "-1")
container["点赞数量"] = data.safe_extract("interactInfo.likedCount", "-1")
@@ -38,33 +36,37 @@ class Explore:
def __extract_tags(container: dict, data: Namespace):
tags = data.safe_extract("tagList", [])
container["作品标签"] = " ".join(
Namespace.object_extract(
i, "name") for i in tags)
Namespace.object_extract(i, "name") for i in tags
)
def __extract_info(self, container: dict, data: Namespace):
container["作品ID"] = data.safe_extract("noteId")
container["作品链接"] = f"https://www.xiaohongshu.com/explore/{container["作品ID"]}"
container["作品链接"] = (
f"https://www.xiaohongshu.com/explore/{container['作品ID']}"
)
container["作品标题"] = data.safe_extract("title")
container["作品描述"] = data.safe_extract("desc")
container["作品类型"] = self.explore_type.get(
data.safe_extract("type"), _("未知"))
data.safe_extract("type"), _("未知")
)
# container["IP归属地"] = data.safe_extract("ipLocation")
def __extract_time(self, container: dict, data: Namespace):
container["发布时间"] = datetime.fromtimestamp(
time /
1000).strftime(
self.time_format) if (
time := data.safe_extract("time")) else _("未知")
container["最后更新时间"] = datetime.fromtimestamp(
last /
1000).strftime(
self.time_format) if (
last := data.safe_extract("lastUpdateTime")) else _("未知")
container["发布时间"] = (
datetime.fromtimestamp(time / 1000).strftime(self.time_format)
if (time := data.safe_extract("time"))
else _("未知")
)
container["最后更新时间"] = (
datetime.fromtimestamp(last / 1000).strftime(self.time_format)
if (last := data.safe_extract("lastUpdateTime"))
else _("未知")
)
@staticmethod
def __extract_user(container: dict, data: Namespace):
container["作者昵称"] = data.safe_extract("user.nickname")
container["作者ID"] = data.safe_extract("user.userId")
container["作者链接"] = f"https://www.xiaohongshu.com/user/profile/{
container["作者ID"]}"
container["作者链接"] = (
f"https://www.xiaohongshu.com/user/profile/{container['作者ID']}"
)

View File

@@ -1,7 +1,7 @@
from source.expansion import Namespace
from .request import Html
__all__ = ['Image']
__all__ = ["Image"]
class Image:
@@ -10,16 +10,18 @@ class Image:
images = data.safe_extract("imageList", [])
live_link = cls.__get_live_link(images)
token_list = [
cls.__extract_image_token(
Namespace.object_extract(
i, "urlDefault")) for i in images]
cls.__extract_image_token(Namespace.object_extract(i, "urlDefault"))
for i in images
]
match format_:
case "png":
return [Html.format_url(cls.__generate_png_link(i))
for i in token_list], live_link
return [
Html.format_url(cls.__generate_png_link(i)) for i in token_list
], live_link
case "webp":
return [Html.format_url(cls.__generate_webp_link(i))
for i in token_list], live_link
return [
Html.format_url(cls.__generate_webp_link(i)) for i in token_list
], live_link
case _:
raise ValueError

View File

@@ -11,7 +11,10 @@ __all__ = ["Html"]
class Html:
def __init__(self, manager: Manager, ):
def __init__(
self,
manager: Manager,
):
self.retry = manager.retry
self.client = manager.request_client
self.headers = manager.headers
@@ -26,23 +29,32 @@ class Html:
cookie: str = None,
**kwargs,
) -> str:
headers = self.select_headers(url, cookie, )
headers = self.select_headers(
url,
cookie,
)
try:
match content:
case True:
response = await self.__request_url_get(url, headers, **kwargs, )
response = await self.__request_url_get(
url,
headers,
**kwargs,
)
await sleep_time()
response.raise_for_status()
return response.text
case False:
response = await self.__request_url_head(url, headers, **kwargs, )
response = await self.__request_url_head(
url,
headers,
**kwargs,
)
await sleep_time()
return str(response.url)
except HTTPError as error:
logging(
log,
_("网络异常,{0} 请求失败: {1}").format(url, repr(error)),
ERROR
log, _("网络异常,{0} 请求失败: {1}").format(url, repr(error)), ERROR
)
return ""
@@ -50,19 +62,33 @@ class Html:
def format_url(url: str) -> str:
return bytes(url, "utf-8").decode("unicode_escape")
def select_headers(self, url: str, cookie: str = None, ) -> dict:
def select_headers(
self,
url: str,
cookie: str = None,
) -> dict:
if "explore" not in url:
return self.blank_headers
return self.headers | {"Cookie": cookie} if cookie else self.headers
async def __request_url_head(self, url: str, headers: dict, **kwargs, ):
async def __request_url_head(
self,
url: str,
headers: dict,
**kwargs,
):
return await self.client.head(
url,
headers=headers,
**kwargs,
)
async def __request_url_get(self, url: str, headers: dict, **kwargs, ):
async def __request_url_get(
self,
url: str,
headers: dict,
**kwargs,
):
return await self.client.get(
url,
headers=headers,

View File

@@ -1,7 +1,7 @@
from source.expansion import Namespace
from .request import Html
__all__ = ['Video']
__all__ = ["Video"]
class Video:
@@ -13,5 +13,8 @@ class Video:
@classmethod
def get_video_link(cls, data: Namespace) -> list:
return [Html.format_url(f"https://sns-video-bd.xhscdn.com/{t}")] if (
t := data.safe_extract(".".join(cls.VIDEO_LINK))) else []
return (
[Html.format_url(f"https://sns-video-bd.xhscdn.com/{t}")]
if (t := data.safe_extract(".".join(cls.VIDEO_LINK)))
else []
)