feat: 支持 API 模式传入 Cookie

2025-12-26 04:48:05 +08:00 · 2024-12-14 14:10:14 +08:00 · 2024-12-14 14:10:14 +08:00 · 60d32d1327
commit 60d32d1327
parent b360eae802
7 changed files with 35 additions and 25 deletions
--- a/README.md
+++ b/README.md
@ -127,25 +127,31 @@
 <tr>
 <td align="center">url</td>
 <td align="center">str</td>
-<td align="center">小红书作品链接，自动提取，不支持多链接</td>
+<td align="center">小红书作品链接，自动提取，不支持多链接；必需参数</td>
 <td align="center">无</td>
 </tr>
 <tr>
 <td align="center">download</td>
 <td align="center">bool</td>
-<td align="center">是否下载作品文件；设置为 <code>true</code> 将会耗费更多时间</td>
+<td align="center">是否下载作品文件；设置为 <code>true</code> 将会耗费更多时间；可选参数</td>
 <td align="center">false</td>
 </tr>
 <tr>
 <td align="center">index</td>
 <td align="center">list[int]</td>
-<td align="center">下载指定序号的图片文件，仅对图文作品生效；<code>download</code> 参数设置为 <code>false</code> 时不生效</td>
+<td align="center">下载指定序号的图片文件，仅对图文作品生效；<code>download</code> 参数设置为 <code>false</code> 时不生效；可选参数</td>
 <td align="center">null</td>
 </tr>
 <tr>
+<td align="center">cookie</td>
+<td align="center">str</td>
+<td align="center">请求数据时使用的 Cookie；可选参数</td>
+<td align="center">配置文件 Cookie 值</td>
+</tr>
+<tr>
 <td align="center">skip</td>
 <td align="center">bool</td>
-<td align="center">是否跳过存在下载记录的作品；设置为 <code>true</code> 将不会返回存在下载记录的作品数据</td>
+<td align="center">是否跳过存在下载记录的作品；设置为 <code>true</code> 将不会返回存在下载记录的作品数据；可选参数</td>
 <td align="center">false</td>
 </tr>
 </tbody>
--- a/README_EN.md
+++ b/README_EN.md
@ -131,25 +131,31 @@
 <tr>
 <td align="center">url</td>
 <td align="center">str</td>
-<td align="center">Xiaohongshu content link, auto-extraction, does not support multiple links</td>
+<td align="center">Xiaohongshu content link, auto-extraction, does not support multiple links; Required parameter</td>
 <td align="center">None</td>
 </tr>
 <tr>
 <td align="center">download</td>
 <td align="center">bool</td>
-<td align="center">Whether to download the content file; set to <code>true</code> will take more time</td>
+<td align="center">Whether to download the content file; set to <code>true</code> will take more time; Optional parameter</td>
 <td align="center">false</td>
 </tr>
 <tr>
 <td align="center">index</td>
 <td align="center">list[int]</td>
-<td align="center">Download specific image files by index, only effective for text and image works; not effective when the <code>download</code> parameter is set to <code>false</code></td>
+<td align="center">Download specific image files by index, only effective for text and image works; not effective when the <code>download</code> parameter is set to <code>false</code>; Optional parameter</td>
 <td align="center">null</td>
 </tr>
 <tr>
+<td align="center">cookie</td>
+<td align="center">str</td>
+<td align="center">Cookies used when requesting data; Optional parameter</td>
+<td align="center">Settings Cookie Value</td>
+</tr>
+<tr>
 <td align="center">skip</td>
 <td align="center">bool</td>
-<td align="center">Whether to skip content with download records; set to <code>true</code> will not return content data with download records</td>
+<td align="center">Whether to skip content with download records; set to <code>true</code> will not return content data with download records; Optional parameter</td>
 <td align="center">false</td>
 </tr>
 </tbody>
--- a/source/TUI/setting.py
+++ b/source/TUI/setting.py
@ -39,13 +39,6 @@ class Setting(Screen):
            Label(self.message("作品文件名称格式"), classes="params", ),
            Input(self.data["name_format"], placeholder=self.message("发布时间 作者昵称 作品标题"), valid_empty=True,
                  id="name_format", ),
-            # Label(self.message("Sec-Ch-Ua"), classes="params", ),
-            # Input(self.data["sec_ch_ua"], placeholder=self.message("内置 Chrome Sec-Ch-Ua"), valid_empty=True,
-            #       id="sec_ch_ua", ),
-            # Label(self.message("Sec-Ch-Ua-Platform"), classes="params", ),
-            # Input(self.data["sec_ch_ua_platform"], placeholder=self.message("内置 Chrome Sec-Ch-Ua-Platform"),
-            #       valid_empty=True,
-            #       id="sec_ch_ua_platform", ),
            Label(self.message("User-Agent"), classes="params", ),
            Input(self.data["user_agent"], placeholder=self.message("内置 Chrome User Agent"), valid_empty=True,
                  id="user_agent", ),
@ -110,8 +103,6 @@ class Setting(Screen):
            "work_path": self.query_one("#work_path").value,
            "folder_name": self.query_one("#folder_name").value,
            "name_format": self.query_one("#name_format").value,
-            # "sec_ch_ua": self.query_one("#sec_ch_ua").value,
-            # "sec_ch_ua_platform": self.query_one("#sec_ch_ua_platform").value,
            "user_agent": self.query_one("#user_agent").value,
            "cookie": self.query_one("#cookie").value or self.data["cookie"],
            "proxy": self.query_one("#proxy").value or None,
--- a/source/application/app.py
+++ b/source/application/app.py
@ -247,13 +247,14 @@ class XHS:
            log,
            bar,
            data: bool,
+            cookie: str = None,
    ):
        if await self.skip_download(i := self.__extract_link_id(url)) and not data:
            msg = self.message("作品 {0} 存在下载记录，跳过处理").format(i)
            logging(log, msg)
            return {"message": msg}
        logging(log, self.message("开始处理作品：{0}").format(i))
-        html = await self.html.request_url(url, log=log)
+        html = await self.html.request_url(url, log=log, cookie=cookie, )
        namespace = self.__generate_data_object(html)
        if not namespace:
            logging(log, self.message("{0} 获取数据失败").format(i), ERROR)
@ -460,6 +461,7 @@ class XHS:
                        None,
                        None,
                        not extract.skip,
+                        extract.cookie,
                ):
                    msg = self.message("获取小红书作品数据成功")
                else:
--- a/source/application/request.py
+++ b/source/application/request.py
@ -23,9 +23,10 @@ class Html:
            url: str,
            content=True,
            log=None,
+            cookie: str = None,
            **kwargs,
    ) -> str:
-        headers = self.select_headers(url, )
+        headers = self.select_headers(url, cookie, )
        try:
            match content:
                case True:
@ -49,8 +50,10 @@ class Html:
    def format_url(url: str) -> str:
        return bytes(url, "utf-8").decode("unicode_escape")

-    def select_headers(self, url: str) -> dict:
-        return self.headers if "explore" in url else self.blank_headers
+    def select_headers(self, url: str, cookie: str = None, ) -> dict:
+        if "explore" not in url:
+            return self.blank_headers
+        return self.headers | {"Cookie": cookie} if cookie else self.headers

    async def __request_url_head(self, url: str, headers: dict, **kwargs, ):
        return await self.client.head(
--- a/source/module/model.py
+++ b/source/module/model.py
@ -5,6 +5,7 @@ class ExtractParams(BaseModel):
    url: str
    download: bool = False
    index: list = None
+    cookie: str = None
    skip: bool = False


--- a/static/Release_Notes.md
+++ b/static/Release_Notes.md
@ -1,7 +1,8 @@
 **项目更新内容：**

 1. 优化文件名称非法字符处理
-2. 适配新版本 HTTPX 库
-3. 更正英语语言代码
-4. 优化文件下载功能
-5. 移除内置延时机制
+2. 支持 API 模式传入 Cookie
+3. 适配新版本 HTTPX 库
+4. 更正英语语言代码
+5. 优化文件下载功能
+6. 移除内置延时机制