fix: 修复项目功能异常

1. 更新 Cookie 参数处理 2. 优化作品数据返回格式 3. 更新用户脚本链接提取功能 4. 优化项目运行信息提示 5. 修复 record_data 参数无效的问题 6. 新增作品下载记录开关 7. 适配小红书平台规则 8. 默认开启局域网访问 9. 格式化项目代码 10. 更新英语翻译 Closes #127 Closes #128 Closes #130 Closes #132
2026-03-22 15:07:17 +08:00 · 2024-08-02 21:22:00 +08:00
parent dd5c09389a
commit 3e8b69f8e1
16 changed files with 171 additions and 69 deletions
--- a/source/application/app.py
+++ b/source/application/app.py
@@ -32,6 +32,7 @@ from source.module import (
    REPOSITORY,
    VERSION_MAJOR,
    VERSION_MINOR,
+    VERSION_BETA,
 )
 from source.module import Translate
 from source.module import logging
@@ -44,10 +45,25 @@ from .video import Video
 __all__ = ["XHS"]


+def _data_cache(function):
+    async def inner(self, data: dict, ):
+        if self.manager.record_data:
+            download = data["下载地址"]
+            lives = data["动图地址"]
+            await function(self, data, )
+            data["下载地址"] = download
+            data["动图地址"] = lives
+
+    return inner
+
+
 class XHS:
-    LINK = compile(r"https?://www\.xiaohongshu\.com/explore/[a-z0-9]+")
-    SHARE = compile(r"https?://www\.xiaohongshu\.com/discovery/item/[a-z0-9]+")
-    SHORT = compile(r"https?://xhslink\.com/[A-Za-z0-9]+")
+    VERSION_MAJOR = VERSION_MAJOR
+    VERSION_MINOR = VERSION_MINOR
+    VERSION_BETA = VERSION_BETA
+    LINK = compile(r"https?://www\.xiaohongshu\.com/explore/\S+")
+    SHARE = compile(r"https?://www\.xiaohongshu\.com/discovery/item/\S+")
+    SHORT = compile(r"https?://xhslink\.com/\S+")
    __INSTANCE = None

    def __new__(cls, *args, **kwargs):
@@ -74,6 +90,7 @@ class XHS:
            video_download=True,
            live_download=False,
            folder_mode=False,
+            download_record=True,
            language="zh_CN",
            # server=False,
            transition: Callable[[str], str] = None,
@@ -101,6 +118,7 @@ class XHS:
            image_download,
            video_download,
            live_download,
+            download_record,
            folder_mode,
            # server,
            self.message,
@@ -127,7 +145,7 @@ class XHS:

    def __extract_video(self, container: dict, data: Namespace):
        container["下载地址"] = self.video.get_video_link(data)
-        container["动图地址"] = ""
+        container["动图地址"] = [None, ]

    async def __download_files(self, container: dict, download: bool, index, log, bar):
        name = self.__naming_rules(container)
@@ -136,13 +154,21 @@ class XHS:
                logging(
                    log, self.message("作品 {0} 存在下载记录，跳过下载").format(i))
            else:
-                path, result = await self.download.run(u, container["动图地址"], index, name, container["作品类型"],
-                                                       log, bar)
+                path, result = await self.download.run(
+                    u,
+                    container["动图地址"],
+                    index,
+                    name,
+                    container["作品类型"],
+                    log,
+                    bar,
+                )
                await self.__add_record(i, result)
        elif not u:
            logging(log, self.message("提取作品文件下载地址失败"), ERROR)
        await self.save_data(container)

+    @_data_cache
    async def save_data(self, data: dict, ):
        data["采集时间"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        data["下载地址"] = " ".join(data["下载地址"])
@@ -196,20 +222,20 @@ class XHS:
        return urls

    async def __deal_extract(self, url: str, download: bool, index: list | tuple | None, log, bar, data: bool, ):
-        if not data and await self.skip_download(i := self.__extract_link_id(url)):
+        if await self.skip_download(i := self.__extract_link_id(url)) and not data:
            msg = self.message("作品 {0} 存在下载记录，跳过处理").format(i)
            logging(log, msg)
            return {"message": msg}
-        logging(log, self.message("开始处理作品：{0}").format(url))
+        logging(log, self.message("开始处理作品：{0}").format(i))
        html = await self.html.request_url(url, log=log)
        namespace = self.__generate_data_object(html)
        if not namespace:
-            logging(log, self.message("{0} 获取数据失败").format(url), ERROR)
+            logging(log, self.message("{0} 获取数据失败").format(i), ERROR)
            return {}
        data = self.explore.run(namespace)
        # logging(log, data)  # 调试代码
        if not data:
-            logging(log, self.message("{0} 提取数据失败").format(url), ERROR)
+            logging(log, self.message("{0} 提取数据失败").format(i), ERROR)
            return {}
        match data["作品类型"]:
            case "视频":
@@ -219,7 +245,7 @@ class XHS:
            case _:
                data["下载地址"] = []
        await self.__download_files(data, download, index, log, bar)
-        logging(log, self.message("作品处理完成：{0}").format(url))
+        logging(log, self.message("作品处理完成：{0}").format(i))
        return data

    @staticmethod
@@ -352,7 +378,7 @@ class XHS:
    #     await self.runner.cleanup()
    #     logging(log, self.message("Web API 服务器已关闭！"))

-    async def run_server(self, host="127.0.0.1", port=8000, log_level="info", ):
+    async def run_server(self, host="0.0.0.0", port=8000, log_level="info", ):
        self.server = FastAPI(
            title="XHS-Downloader",
            version=f"{VERSION_MAJOR}.{VERSION_MINOR}")
--- a/source/application/download.py
+++ b/source/application/download.py
@@ -37,8 +37,16 @@ class Download:
        self.video_download = manager.video_download
        self.live_download = manager.live_download

-    async def run(self, urls: list, lives: list, index: list | tuple | None, name: str, type_: str, log, bar) -> tuple[
-        Path, tuple]:
+    async def run(
+            self,
+            urls: list,
+            lives: list,
+            index: list | tuple | None,
+            name: str,
+            type_: str,
+            log,
+            bar,
+    ) -> tuple[Path, tuple]:
        path = self.__generate_path(name)
        match type_:
            case "视频":
--- a/source/application/explore.py
+++ b/source/application/explore.py
@@ -40,8 +40,7 @@ class Explore:

    def __extract_info(self, container: dict, data: Namespace):
        container["作品ID"] = data.safe_extract("noteId")
-        container["作品链接"] = f"https://www.xiaohongshu.com/explore/{
-        container["作品ID"]}"
+        container["作品链接"] = f"https://www.xiaohongshu.com/explore/{container["作品ID"]}"
        container["作品标题"] = data.safe_extract("title")
        container["作品描述"] = data.safe_extract("desc")
        container["作品类型"] = self.explore_type.get(
--- a/source/application/image.py
+++ b/source/application/image.py
@@ -37,9 +37,12 @@ class Image:

    @staticmethod
    def __get_live_link(items: list) -> list:
-        links = []
-        for item in items:
-            links.append(
-                Html.format_url(Namespace.object_extract(
-                    item, "stream.h264[0].masterUrl")))
-        return links
+        return [
+            (
+                    Html.format_url(
+                        Namespace.object_extract(item, "stream.h264[0].masterUrl")
+                    )
+                    or None
+            )
+            for item in items
+        ]
--- a/source/application/request.py
+++ b/source/application/request.py
@@ -13,6 +13,8 @@ class Html:
        self.retry = manager.retry
        self.message = manager.message
        self.client = manager.request_client
+        self.headers = manager.headers
+        self.blank_headers = manager.blank_headers

    @retry
    async def request_url(
@@ -25,6 +27,7 @@ class Html:
        try:
            response = await self.client.get(
                url,
+                headers=self.select_headers(url, ),
                **kwargs,
            )
            response.raise_for_status()
@@ -38,3 +41,6 @@ class Html:
    @staticmethod
    def format_url(url: str) -> str:
        return bytes(url, "utf-8").decode("unicode_escape")
+
+    def select_headers(self, url: str) -> dict:
+        return self.blank_headers if "discovery/item" in url else self.headers