mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2026-03-22 06:57:16 +08:00
style: 代码格式化和字符串处理优化
- 优化代码缩进和换行,提高可读性 - 统一使用单引号或双引号,保持一致性 - 移除冗余的空格和括号,精简代码
This commit is contained in:
@@ -48,11 +48,17 @@ __all__ = ["XHS"]
|
||||
|
||||
|
||||
def _data_cache(function):
|
||||
async def inner(self, data: dict, ):
|
||||
async def inner(
|
||||
self,
|
||||
data: dict,
|
||||
):
|
||||
if self.manager.record_data:
|
||||
download = data["下载地址"]
|
||||
lives = data["动图地址"]
|
||||
await function(self, data, )
|
||||
await function(
|
||||
self,
|
||||
data,
|
||||
)
|
||||
data["下载地址"] = download
|
||||
data["动图地址"] = lives
|
||||
|
||||
@@ -137,11 +143,14 @@ class XHS:
|
||||
|
||||
def __extract_image(self, container: dict, data: Namespace):
|
||||
container["下载地址"], container["动图地址"] = self.image.get_image_link(
|
||||
data, self.manager.image_format)
|
||||
data, self.manager.image_format
|
||||
)
|
||||
|
||||
def __extract_video(self, container: dict, data: Namespace):
|
||||
container["下载地址"] = self.video.get_video_link(data)
|
||||
container["动图地址"] = [None, ]
|
||||
container["动图地址"] = [
|
||||
None,
|
||||
]
|
||||
|
||||
async def __download_files(
|
||||
self,
|
||||
@@ -154,8 +163,7 @@ class XHS:
|
||||
name = self.__naming_rules(container)
|
||||
if (u := container["下载地址"]) and download:
|
||||
if await self.skip_download(i := container["作品ID"]):
|
||||
logging(
|
||||
log, _("作品 {0} 存在下载记录,跳过下载").format(i))
|
||||
logging(log, _("作品 {0} 存在下载记录,跳过下载").format(i))
|
||||
else:
|
||||
path, result = await self.download.run(
|
||||
u,
|
||||
@@ -172,7 +180,10 @@ class XHS:
|
||||
await self.save_data(container)
|
||||
|
||||
@_data_cache
|
||||
async def save_data(self, data: dict, ):
|
||||
async def save_data(
|
||||
self,
|
||||
data: dict,
|
||||
):
|
||||
data["采集时间"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
data["下载地址"] = " ".join(data["下载地址"])
|
||||
data["动图地址"] = " ".join(i or "NaN" for i in data["动图地址"])
|
||||
@@ -196,10 +207,19 @@ class XHS:
|
||||
if not urls:
|
||||
logging(log, _("提取小红书作品链接失败"), WARNING)
|
||||
else:
|
||||
logging(
|
||||
log, _("共 {0} 个小红书作品待处理...").format(len(urls)))
|
||||
logging(log, _("共 {0} 个小红书作品待处理...").format(len(urls)))
|
||||
# return urls # 调试代码
|
||||
return [await self.__deal_extract(i, download, index, log, bar, data, ) for i in urls]
|
||||
return [
|
||||
await self.__deal_extract(
|
||||
i,
|
||||
download,
|
||||
index,
|
||||
log,
|
||||
bar,
|
||||
data,
|
||||
)
|
||||
for i in urls
|
||||
]
|
||||
|
||||
async def extract_cli(
|
||||
self,
|
||||
@@ -214,7 +234,14 @@ class XHS:
|
||||
if not url:
|
||||
logging(log, _("提取小红书作品链接失败"), WARNING)
|
||||
else:
|
||||
await self.__deal_extract(url[0], download, index, log, bar, data, )
|
||||
await self.__deal_extract(
|
||||
url[0],
|
||||
download,
|
||||
index,
|
||||
log,
|
||||
bar,
|
||||
data,
|
||||
)
|
||||
|
||||
async def extract_links(self, url: str, log) -> list:
|
||||
urls = []
|
||||
@@ -253,7 +280,11 @@ class XHS:
|
||||
logging(log, msg)
|
||||
return {"message": msg}
|
||||
logging(log, _("开始处理作品:{0}").format(i))
|
||||
html = await self.html.request_url(url, log=log, cookie=cookie, )
|
||||
html = await self.html.request_url(
|
||||
url,
|
||||
log=log,
|
||||
cookie=cookie,
|
||||
)
|
||||
namespace = self.__generate_data_object(html)
|
||||
if not namespace:
|
||||
logging(log, _("{0} 获取数据失败").format(i), ERROR)
|
||||
@@ -299,10 +330,12 @@ class XHS:
|
||||
return beautify_string(
|
||||
self.CLEANER.filter_name(
|
||||
self.manager.SEPARATE.join(values),
|
||||
default=self.manager.SEPARATE.join((
|
||||
data["作者ID"],
|
||||
data["作品ID"],
|
||||
)),
|
||||
default=self.manager.SEPARATE.join(
|
||||
(
|
||||
data["作者ID"],
|
||||
data["作品ID"],
|
||||
)
|
||||
),
|
||||
),
|
||||
length=128,
|
||||
)
|
||||
@@ -315,10 +348,13 @@ class XHS:
|
||||
return self.manager.filter_name(data["作者昵称"]) or data["作者ID"]
|
||||
|
||||
def __get_name_title(self, data: dict) -> str:
|
||||
return beautify_string(
|
||||
self.manager.filter_name(data["作品标题"]),
|
||||
64,
|
||||
) or data["作品ID"]
|
||||
return (
|
||||
beautify_string(
|
||||
self.manager.filter_name(data["作品标题"]),
|
||||
64,
|
||||
)
|
||||
or data["作品ID"]
|
||||
)
|
||||
|
||||
async def monitor(
|
||||
self,
|
||||
@@ -331,11 +367,15 @@ class XHS:
|
||||
logging(
|
||||
None,
|
||||
_(
|
||||
"程序会自动读取并提取剪贴板中的小红书作品链接,并自动下载链接对应的作品文件,如需关闭,请点击关闭按钮,或者向剪贴板写入 “close” 文本!"),
|
||||
"程序会自动读取并提取剪贴板中的小红书作品链接,并自动下载链接对应的作品文件,如需关闭,请点击关闭按钮,或者向剪贴板写入 “close” 文本!"
|
||||
),
|
||||
style=MASTER,
|
||||
)
|
||||
self.event.clear()
|
||||
await gather(self.__push_link(delay), self.__receive_link(delay, download, None, log, bar, data))
|
||||
await gather(
|
||||
self.__push_link(delay),
|
||||
self.__receive_link(delay, download, None, log, bar, data),
|
||||
)
|
||||
|
||||
async def __push_link(self, delay: int):
|
||||
while not self.event.is_set():
|
||||
@@ -373,10 +413,16 @@ class XHS:
|
||||
|
||||
@staticmethod
|
||||
def read_browser_cookie(value: str | int) -> str:
|
||||
return BrowserCookie.get(
|
||||
value,
|
||||
domains=["xiaohongshu.com", ],
|
||||
) if value else ""
|
||||
return (
|
||||
BrowserCookie.get(
|
||||
value,
|
||||
domains=[
|
||||
"xiaohongshu.com",
|
||||
],
|
||||
)
|
||||
if value
|
||||
else ""
|
||||
)
|
||||
|
||||
# @staticmethod
|
||||
# async def index(request):
|
||||
@@ -425,11 +471,17 @@ class XHS:
|
||||
# await self.runner.cleanup()
|
||||
# logging(log, _("Web API 服务器已关闭!"))
|
||||
|
||||
async def run_server(self, host="0.0.0.0", port=8000, log_level="info", ):
|
||||
async def run_server(
|
||||
self,
|
||||
host="0.0.0.0",
|
||||
port=8000,
|
||||
log_level="info",
|
||||
):
|
||||
self.server = FastAPI(
|
||||
debug=self.VERSION_BETA,
|
||||
title="XHS-Downloader",
|
||||
version=f"{self.VERSION_MAJOR}.{self.VERSION_MINOR}")
|
||||
version=f"{self.VERSION_MAJOR}.{self.VERSION_MINOR}",
|
||||
)
|
||||
self.setup_routes()
|
||||
config = Config(
|
||||
self.server,
|
||||
@@ -445,7 +497,10 @@ class XHS:
|
||||
async def index():
|
||||
return RedirectResponse(url=REPOSITORY)
|
||||
|
||||
@self.server.post("/xhs/", response_model=ExtractData, )
|
||||
@self.server.post(
|
||||
"/xhs/",
|
||||
response_model=ExtractData,
|
||||
)
|
||||
async def handle(extract: ExtractParams):
|
||||
url = await self.extract_links(extract.url, None)
|
||||
if not url:
|
||||
@@ -466,6 +521,5 @@ class XHS:
|
||||
msg = _("获取小红书作品数据失败")
|
||||
data = None
|
||||
return ExtractData(
|
||||
message=msg,
|
||||
url=url[0] if url else extract.url,
|
||||
data=data)
|
||||
message=msg, url=url[0] if url else extract.url, data=data
|
||||
)
|
||||
|
||||
@@ -23,7 +23,7 @@ from ..translation import _
|
||||
if TYPE_CHECKING:
|
||||
from httpx import AsyncClient
|
||||
|
||||
__all__ = ['Download']
|
||||
__all__ = ["Download"]
|
||||
|
||||
|
||||
class Download:
|
||||
@@ -38,7 +38,10 @@ class Download:
|
||||
"audio/mpeg": "mp3",
|
||||
}
|
||||
|
||||
def __init__(self, manager: Manager, ):
|
||||
def __init__(
|
||||
self,
|
||||
manager: Manager,
|
||||
):
|
||||
self.manager = manager
|
||||
self.folder = manager.folder
|
||||
self.temp = manager.temp
|
||||
@@ -98,7 +101,8 @@ class Download:
|
||||
format_,
|
||||
log,
|
||||
bar,
|
||||
) for url, name, format_ in tasks
|
||||
)
|
||||
for url, name, format_ in tasks
|
||||
]
|
||||
tasks = await gather(*tasks)
|
||||
return path, tasks
|
||||
@@ -109,11 +113,8 @@ class Download:
|
||||
return path
|
||||
|
||||
def __ready_download_video(
|
||||
self,
|
||||
urls: list[str],
|
||||
path: Path,
|
||||
name: str,
|
||||
log) -> list:
|
||||
self, urls: list[str], path: Path, name: str, log
|
||||
) -> list:
|
||||
if not self.video_download:
|
||||
logging(log, _("视频作品下载功能已关闭,跳过下载"))
|
||||
return []
|
||||
@@ -128,7 +129,8 @@ class Download:
|
||||
index: list | tuple | None,
|
||||
path: Path,
|
||||
name: str,
|
||||
log) -> list:
|
||||
log,
|
||||
) -> list:
|
||||
tasks = []
|
||||
if not self.image_download:
|
||||
logging(log, _("图文作品下载功能已关闭,跳过下载"))
|
||||
@@ -146,28 +148,38 @@ class Download:
|
||||
for s in self.image_format_list
|
||||
):
|
||||
tasks.append([j[0], file, self.image_format])
|
||||
if not self.live_download or not j[1] or self.__check_exists_path(
|
||||
if (
|
||||
not self.live_download
|
||||
or not j[1]
|
||||
or self.__check_exists_path(
|
||||
path,
|
||||
f"{file}.{self.live_format}",
|
||||
log,
|
||||
)
|
||||
):
|
||||
continue
|
||||
tasks.append([j[1], file, self.live_format])
|
||||
return tasks
|
||||
|
||||
def __check_exists_glob(self, path: Path, name: str, log, ) -> bool:
|
||||
def __check_exists_glob(
|
||||
self,
|
||||
path: Path,
|
||||
name: str,
|
||||
log,
|
||||
) -> bool:
|
||||
if any(path.glob(name)):
|
||||
logging(
|
||||
log, _(
|
||||
"{0} 文件已存在,跳过下载").format(name))
|
||||
logging(log, _("{0} 文件已存在,跳过下载").format(name))
|
||||
return True
|
||||
return False
|
||||
|
||||
def __check_exists_path(self, path: Path, name: str, log, ) -> bool:
|
||||
def __check_exists_path(
|
||||
self,
|
||||
path: Path,
|
||||
name: str,
|
||||
log,
|
||||
) -> bool:
|
||||
if path.joinpath(name).exists():
|
||||
logging(
|
||||
log, _(
|
||||
"{0} 文件已存在,跳过下载").format(name))
|
||||
logging(log, _("{0} 文件已存在,跳过下载").format(name))
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -199,9 +211,16 @@ class Download:
|
||||
# return False
|
||||
# temp = self.temp.joinpath(f"{name}.{suffix}")
|
||||
temp = self.temp.joinpath(f"{name}.{format_}")
|
||||
self.__update_headers_range(headers, temp, )
|
||||
self.__update_headers_range(
|
||||
headers,
|
||||
temp,
|
||||
)
|
||||
try:
|
||||
async with self.client.stream("GET", url, headers=headers, ) as response:
|
||||
async with self.client.stream(
|
||||
"GET",
|
||||
url,
|
||||
headers=headers,
|
||||
) as response:
|
||||
await sleep_time()
|
||||
if response.status_code == 416:
|
||||
raise CacheError(
|
||||
@@ -234,8 +253,9 @@ class Download:
|
||||
# self.__create_progress(bar, None)
|
||||
logging(
|
||||
log,
|
||||
_(
|
||||
"网络异常,{0} 下载失败,错误信息: {1}").format(name, repr(error)),
|
||||
_("网络异常,{0} 下载失败,错误信息: {1}").format(
|
||||
name, repr(error)
|
||||
),
|
||||
ERROR,
|
||||
)
|
||||
return False
|
||||
@@ -248,7 +268,11 @@ class Download:
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def __create_progress(bar, total: int | None, completed=0, ):
|
||||
def __create_progress(
|
||||
bar,
|
||||
total: int | None,
|
||||
completed=0,
|
||||
):
|
||||
if bar:
|
||||
bar.update(total=total, completed=completed)
|
||||
|
||||
@@ -273,10 +297,8 @@ class Download:
|
||||
)
|
||||
await sleep_time()
|
||||
response.raise_for_status()
|
||||
suffix = self.__extract_type(
|
||||
response.headers.get("Content-Type")) or suffix
|
||||
length = response.headers.get(
|
||||
"Content-Length", 0)
|
||||
suffix = self.__extract_type(response.headers.get("Content-Type")) or suffix
|
||||
length = response.headers.get("Content-Length", 0)
|
||||
return int(length), suffix
|
||||
|
||||
@staticmethod
|
||||
@@ -303,12 +325,14 @@ class Download:
|
||||
async with open(temp, "rb") as f:
|
||||
file_start = await f.read(FILE_SIGNATURES_LENGTH)
|
||||
for offset, signature, suffix in FILE_SIGNATURES:
|
||||
if file_start[offset:offset + len(signature)] == signature:
|
||||
if file_start[offset: offset + len(signature)] == signature:
|
||||
return path.joinpath(f"{name}.{suffix}")
|
||||
except Exception as error:
|
||||
logging(
|
||||
log,
|
||||
_("文件 {0} 格式判断失败,错误信息:{1}").format(temp.name, repr(error)),
|
||||
_("文件 {0} 格式判断失败,错误信息:{1}").format(
|
||||
temp.name, repr(error)
|
||||
),
|
||||
ERROR,
|
||||
)
|
||||
return path.joinpath(f"{name}.{default_suffix}")
|
||||
|
||||
@@ -3,7 +3,7 @@ from datetime import datetime
|
||||
from ..expansion import Namespace
|
||||
from ..translation import _
|
||||
|
||||
__all__ = ['Explore']
|
||||
__all__ = ["Explore"]
|
||||
|
||||
|
||||
class Explore:
|
||||
@@ -27,10 +27,8 @@ class Explore:
|
||||
|
||||
@staticmethod
|
||||
def __extract_interact_info(container: dict, data: Namespace) -> None:
|
||||
container["收藏数量"] = data.safe_extract(
|
||||
"interactInfo.collectedCount", "-1")
|
||||
container["评论数量"] = data.safe_extract(
|
||||
"interactInfo.commentCount", "-1")
|
||||
container["收藏数量"] = data.safe_extract("interactInfo.collectedCount", "-1")
|
||||
container["评论数量"] = data.safe_extract("interactInfo.commentCount", "-1")
|
||||
container["分享数量"] = data.safe_extract("interactInfo.shareCount", "-1")
|
||||
container["点赞数量"] = data.safe_extract("interactInfo.likedCount", "-1")
|
||||
|
||||
@@ -38,33 +36,37 @@ class Explore:
|
||||
def __extract_tags(container: dict, data: Namespace):
|
||||
tags = data.safe_extract("tagList", [])
|
||||
container["作品标签"] = " ".join(
|
||||
Namespace.object_extract(
|
||||
i, "name") for i in tags)
|
||||
Namespace.object_extract(i, "name") for i in tags
|
||||
)
|
||||
|
||||
def __extract_info(self, container: dict, data: Namespace):
|
||||
container["作品ID"] = data.safe_extract("noteId")
|
||||
container["作品链接"] = f"https://www.xiaohongshu.com/explore/{container["作品ID"]}"
|
||||
container["作品链接"] = (
|
||||
f"https://www.xiaohongshu.com/explore/{container['作品ID']}"
|
||||
)
|
||||
container["作品标题"] = data.safe_extract("title")
|
||||
container["作品描述"] = data.safe_extract("desc")
|
||||
container["作品类型"] = self.explore_type.get(
|
||||
data.safe_extract("type"), _("未知"))
|
||||
data.safe_extract("type"), _("未知")
|
||||
)
|
||||
# container["IP归属地"] = data.safe_extract("ipLocation")
|
||||
|
||||
def __extract_time(self, container: dict, data: Namespace):
|
||||
container["发布时间"] = datetime.fromtimestamp(
|
||||
time /
|
||||
1000).strftime(
|
||||
self.time_format) if (
|
||||
time := data.safe_extract("time")) else _("未知")
|
||||
container["最后更新时间"] = datetime.fromtimestamp(
|
||||
last /
|
||||
1000).strftime(
|
||||
self.time_format) if (
|
||||
last := data.safe_extract("lastUpdateTime")) else _("未知")
|
||||
container["发布时间"] = (
|
||||
datetime.fromtimestamp(time / 1000).strftime(self.time_format)
|
||||
if (time := data.safe_extract("time"))
|
||||
else _("未知")
|
||||
)
|
||||
container["最后更新时间"] = (
|
||||
datetime.fromtimestamp(last / 1000).strftime(self.time_format)
|
||||
if (last := data.safe_extract("lastUpdateTime"))
|
||||
else _("未知")
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def __extract_user(container: dict, data: Namespace):
|
||||
container["作者昵称"] = data.safe_extract("user.nickname")
|
||||
container["作者ID"] = data.safe_extract("user.userId")
|
||||
container["作者链接"] = f"https://www.xiaohongshu.com/user/profile/{
|
||||
container["作者ID"]}"
|
||||
container["作者链接"] = (
|
||||
f"https://www.xiaohongshu.com/user/profile/{container['作者ID']}"
|
||||
)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from source.expansion import Namespace
|
||||
from .request import Html
|
||||
|
||||
__all__ = ['Image']
|
||||
__all__ = ["Image"]
|
||||
|
||||
|
||||
class Image:
|
||||
@@ -10,16 +10,18 @@ class Image:
|
||||
images = data.safe_extract("imageList", [])
|
||||
live_link = cls.__get_live_link(images)
|
||||
token_list = [
|
||||
cls.__extract_image_token(
|
||||
Namespace.object_extract(
|
||||
i, "urlDefault")) for i in images]
|
||||
cls.__extract_image_token(Namespace.object_extract(i, "urlDefault"))
|
||||
for i in images
|
||||
]
|
||||
match format_:
|
||||
case "png":
|
||||
return [Html.format_url(cls.__generate_png_link(i))
|
||||
for i in token_list], live_link
|
||||
return [
|
||||
Html.format_url(cls.__generate_png_link(i)) for i in token_list
|
||||
], live_link
|
||||
case "webp":
|
||||
return [Html.format_url(cls.__generate_webp_link(i))
|
||||
for i in token_list], live_link
|
||||
return [
|
||||
Html.format_url(cls.__generate_webp_link(i)) for i in token_list
|
||||
], live_link
|
||||
case _:
|
||||
raise ValueError
|
||||
|
||||
|
||||
@@ -11,7 +11,10 @@ __all__ = ["Html"]
|
||||
|
||||
|
||||
class Html:
|
||||
def __init__(self, manager: Manager, ):
|
||||
def __init__(
|
||||
self,
|
||||
manager: Manager,
|
||||
):
|
||||
self.retry = manager.retry
|
||||
self.client = manager.request_client
|
||||
self.headers = manager.headers
|
||||
@@ -26,23 +29,32 @@ class Html:
|
||||
cookie: str = None,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
headers = self.select_headers(url, cookie, )
|
||||
headers = self.select_headers(
|
||||
url,
|
||||
cookie,
|
||||
)
|
||||
try:
|
||||
match content:
|
||||
case True:
|
||||
response = await self.__request_url_get(url, headers, **kwargs, )
|
||||
response = await self.__request_url_get(
|
||||
url,
|
||||
headers,
|
||||
**kwargs,
|
||||
)
|
||||
await sleep_time()
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
case False:
|
||||
response = await self.__request_url_head(url, headers, **kwargs, )
|
||||
response = await self.__request_url_head(
|
||||
url,
|
||||
headers,
|
||||
**kwargs,
|
||||
)
|
||||
await sleep_time()
|
||||
return str(response.url)
|
||||
except HTTPError as error:
|
||||
logging(
|
||||
log,
|
||||
_("网络异常,{0} 请求失败: {1}").format(url, repr(error)),
|
||||
ERROR
|
||||
log, _("网络异常,{0} 请求失败: {1}").format(url, repr(error)), ERROR
|
||||
)
|
||||
return ""
|
||||
|
||||
@@ -50,19 +62,33 @@ class Html:
|
||||
def format_url(url: str) -> str:
|
||||
return bytes(url, "utf-8").decode("unicode_escape")
|
||||
|
||||
def select_headers(self, url: str, cookie: str = None, ) -> dict:
|
||||
def select_headers(
|
||||
self,
|
||||
url: str,
|
||||
cookie: str = None,
|
||||
) -> dict:
|
||||
if "explore" not in url:
|
||||
return self.blank_headers
|
||||
return self.headers | {"Cookie": cookie} if cookie else self.headers
|
||||
|
||||
async def __request_url_head(self, url: str, headers: dict, **kwargs, ):
|
||||
async def __request_url_head(
|
||||
self,
|
||||
url: str,
|
||||
headers: dict,
|
||||
**kwargs,
|
||||
):
|
||||
return await self.client.head(
|
||||
url,
|
||||
headers=headers,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
async def __request_url_get(self, url: str, headers: dict, **kwargs, ):
|
||||
async def __request_url_get(
|
||||
self,
|
||||
url: str,
|
||||
headers: dict,
|
||||
**kwargs,
|
||||
):
|
||||
return await self.client.get(
|
||||
url,
|
||||
headers=headers,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from source.expansion import Namespace
|
||||
from .request import Html
|
||||
|
||||
__all__ = ['Video']
|
||||
__all__ = ["Video"]
|
||||
|
||||
|
||||
class Video:
|
||||
@@ -13,5 +13,8 @@ class Video:
|
||||
|
||||
@classmethod
|
||||
def get_video_link(cls, data: Namespace) -> list:
|
||||
return [Html.format_url(f"https://sns-video-bd.xhscdn.com/{t}")] if (
|
||||
t := data.safe_extract(".".join(cls.VIDEO_LINK))) else []
|
||||
return (
|
||||
[Html.format_url(f"https://sns-video-bd.xhscdn.com/{t}")]
|
||||
if (t := data.safe_extract(".".join(cls.VIDEO_LINK)))
|
||||
else []
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user