perf(download.py): 优化已下载文件判断逻辑

1. 优化图片文件后缀名 2. 修改文件下载并发数 3. 优化项目代码格式 4. 更新项目使用说明
2025-12-26 04:48:05 +08:00 · 2024-10-13 11:00:18 +08:00 · 2024-10-13 11:00:18 +08:00 · 58b2067c68
commit 58b2067c68
parent 04557b074f
6 changed files with 53 additions and 19 deletions
--- a/README.md
+++ b/README.md
@ -15,6 +15,7 @@
 <br>
 <p>🔥 <b>小红书链接提取/作品采集工具</b>：提取账号发布、收藏、点赞、专辑作品链接；提取搜索结果作品链接、用户链接；采集小红书作品信息；提取小红书作品下载地址；下载小红书无水印作品文件！</p>
 <p><strong>⚠️ 2024/9/24: 由于小红书规则更新，使用版本号低于 <code>1.7.1</code> 的用户脚本有封号风险，请及时更新用户脚本后再使用！</strong></p>
+<p><strong>⚠️ 2024/10/13: 由于作品链接携带日期信息，使用先前日期获取的作品链接可能会被风控，建议下载作品文件时使用最新获取的作品链接！</strong></p>
 <p>⭐ 本项目完全免费开源，无任何收费功能，请勿上当受骗！</p>
 <h1>📑 项目功能</h1>
 <ul><b>程序功能</b>
--- a/README_EN.md
+++ b/README_EN.md
@ -15,6 +15,7 @@
 <br>
 <p>🔥 <b>Xiaohongshu Link Extraction/Content Collection Tool</b>：Extract account-published, favorited, and liked content links; extract search result content links and user links; collect Xiaohongshu content information; extract Xiaohongshu content download addresses; download Xiaohongshu watermark-free content files!</p>
 <p><strong>⚠️ 2024/9/24: Due to rule updates on Xiaohongshu, there is a risk of account suspension for user scripts with version numbers lower than <code>1.7.1</code> Please update the user scripts in a timely manner before using them!</strong></p>
+<p><strong>⚠️ 2024/10/13: Due to the date information carried in the links of Xiaohongshu works, using links obtained from previous dates may be subject to risk control. It is recommended to use the latest Xiaohongshu works links when downloading Xiaohongshu work files!</strong></p>
 <p>⭐ This project is completely free and open-source, with no paid features. Please do not be deceived!</p>
 <p>⭐ Due to the author's limited energy, I was unable to update the English document in a timely manner, and the content may have become outdated, partial translation is machine translation, the translation result may be incorrect, Suggest referring to Chinese documentation. If you want to contribute to translation, we warmly welcome you.</p>
 <h1>📑 Project Features</h1>
--- a/source/application/download.py
+++ b/source/application/download.py
@ -6,16 +6,17 @@ from typing import TYPE_CHECKING, Any
 from aiofiles import open
 from httpx import HTTPError

-from source.module import ERROR
-from source.module import (
+from ..module import ERROR
+from ..module import (
    FILE_SIGNATURES_LENGTH,
    FILE_SIGNATURES,
 )
-# from source.module import WARNING
-from source.module import Manager
-from source.module import logging
-from source.module import retry as re_download
-from source.module import sleep_time
+from ..module import MAX_WORKERS
+# from ..module import WARNING
+from ..module import Manager
+from ..module import logging
+from ..module import retry as re_download
+from ..module import sleep_time

 if TYPE_CHECKING:
    from httpx import AsyncClient
@ -24,10 +25,10 @@ __all__ = ['Download']


 class Download:
-    SEMAPHORE = Semaphore(4)
+    SEMAPHORE = Semaphore(MAX_WORKERS)
    CONTENT_TYPE_MAP = {
        "image/png": "png",
-        "image/jpeg": "jpg",
+        "image/jpeg": "jpeg",
        "image/webp": "webp",
        "application/octet-stream": "",
        "video/mp4": "mp4",
@ -47,6 +48,13 @@ class Download:
        self.video_format = "mp4"
        self.live_format = "mp4"
        self.image_format = manager.image_format
+        self.image_format_list = (
+            "jpeg",
+            "png",
+            "webp",
+            "avif",
+            "heic",
+        )
        self.image_download = manager.image_download
        self.video_download = manager.video_download
        self.live_download = manager.live_download
@ -97,7 +105,7 @@ class Download:
        if not self.video_download:
            logging(log, self.message("视频作品下载功能已关闭，跳过下载"))
            return []
-        if self.__check_exists(path, f"{name}.{self.video_format}", log):
+        if self.__check_exists_path(path, f"{name}.{self.video_format}", log):
            return []
        return [(urls[0], name, self.video_format)]

@ -117,16 +125,25 @@ class Download:
            if index and i not in index:
                continue
            file = f"{name}_{i}"
-            if not self.__check_exists(
-                    path, f"{file}.{self.image_format}", log):
+            if not any(
+                    self.__check_exists_path(
+                        path,
+                        f"{file}.{s}",
+                        log,
+                    )
+                    for s in self.image_format_list
+            ):
                tasks.append([j[0], file, self.image_format])
-            if not self.live_download or not j[1] or self.__check_exists(
-                    path, f"{file}.{self.live_format}", log):
+            if not self.live_download or not j[1] or self.__check_exists_path(
+                    path,
+                    f"{file}.{self.live_format}",
+                    log,
+            ):
                continue
            tasks.append([j[1], file, self.live_format])
        return tasks

-    def __check_exists(self, path: Path, name: str, log, ) -> bool:
+    def __check_exists_glob(self, path: Path, name: str, log, ) -> bool:
        if any(path.glob(name)):
            logging(
                log, self.message(
@ -134,6 +151,14 @@ class Download:
            return True
        return False

+    def __check_exists_path(self, path: Path, name: str, log, ) -> bool:
+        if path.joinpath(name).exists():
+            logging(
+                log, self.message(
+                    "{0} 文件已存在，跳过下载").format(name))
+            return True
+        return False
+
    @re_download
    async def __download(
            self,
--- a/source/module/init.py
+++ b/source/module/init.py
@ -30,6 +30,7 @@ from .static import (
    SEC_CH_UA_PLATFORM,
    FILE_SIGNATURES,
    FILE_SIGNATURES_LENGTH,
+    MAX_WORKERS,
 )
 from .tools import (
    retry,
--- a/source/module/static.py
+++ b/source/module/static.py
@ -48,7 +48,7 @@ FILE_SIGNATURES: tuple[tuple[int, bytes, str,], ...] = (
    # 分别为偏移量(字节)、十六进制签名、后缀
    # 参考：https://en.wikipedia.org/wiki/List_of_file_signatures
    # 参考：https://www.garykessler.net/library/file_sigs.html
-    (0, b"\xFF\xD8\xFF", "jpg"),
+    (0, b"\xFF\xD8\xFF", "jpeg"),
    (0, b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A", "png"),
    (4, b"\x66\x74\x79\x70\x61\x76\x69\x66", "avif"),
    (4, b"\x66\x74\x79\x70\x68\x65\x69\x63", "heic"),
@ -64,3 +64,5 @@ FILE_SIGNATURES: tuple[tuple[int, bytes, str,], ...] = (
    (8, b"\x41\x56\x49\x20", "avi"),
 )
 FILE_SIGNATURES_LENGTH = max(offset + len(signature) for offset, signature, _ in FILE_SIGNATURES)
+
+MAX_WORKERS: int = 3
--- a/static/Release_Notes.md
+++ b/static/Release_Notes.md
@ -2,9 +2,11 @@

 1. 优化从浏览器读取 Cookie 功能
 2. 修复文件名称过长报错的问题
-3. 新增文件名称长度限制
-4. 优化文件后缀处理逻辑
-5. 优化代理测试逻辑
+3. 优化已下载文件判断逻辑
+4. 更新文件下载并发数量
+5. 新增文件名称长度限制
+6. 优化文件后缀处理逻辑
+7. 优化代理测试逻辑

 <p><strong>旧版本升级后首次运行请删除配置文件 <code>settings.json</code>，删除后重新运行程序会自动生成新的默认配置文件！</strong></p>

@ -15,3 +17,5 @@
 1. 重构作品链接提取功能

 <p><strong>⚠️ 由于小红书规则更新，使用版本号低于 <code>1.7.1</code> 的用户脚本有封号风险，请及时更新用户脚本后再使用！</strong></p>
+
+<p><strong>⚠️ 由于作品链接携带日期信息，使用先前日期获取的作品链接可能会被风控，建议下载作品文件时使用最新获取的作品链接！</strong></p>