mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2025-12-26 04:48:05 +08:00
feat: 支持 API 模式传入 Cookie
This commit is contained in:
parent
b360eae802
commit
60d32d1327
14
README.md
14
README.md
@ -127,25 +127,31 @@
|
||||
<tr>
|
||||
<td align="center">url</td>
|
||||
<td align="center">str</td>
|
||||
<td align="center">小红书作品链接,自动提取,不支持多链接</td>
|
||||
<td align="center">小红书作品链接,自动提取,不支持多链接;必需参数</td>
|
||||
<td align="center">无</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">download</td>
|
||||
<td align="center">bool</td>
|
||||
<td align="center">是否下载作品文件;设置为 <code>true</code> 将会耗费更多时间</td>
|
||||
<td align="center">是否下载作品文件;设置为 <code>true</code> 将会耗费更多时间;可选参数</td>
|
||||
<td align="center">false</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">index</td>
|
||||
<td align="center">list[int]</td>
|
||||
<td align="center">下载指定序号的图片文件,仅对图文作品生效;<code>download</code> 参数设置为 <code>false</code> 时不生效</td>
|
||||
<td align="center">下载指定序号的图片文件,仅对图文作品生效;<code>download</code> 参数设置为 <code>false</code> 时不生效;可选参数</td>
|
||||
<td align="center">null</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">cookie</td>
|
||||
<td align="center">str</td>
|
||||
<td align="center">请求数据时使用的 Cookie;可选参数</td>
|
||||
<td align="center">配置文件 Cookie 值</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">skip</td>
|
||||
<td align="center">bool</td>
|
||||
<td align="center">是否跳过存在下载记录的作品;设置为 <code>true</code> 将不会返回存在下载记录的作品数据</td>
|
||||
<td align="center">是否跳过存在下载记录的作品;设置为 <code>true</code> 将不会返回存在下载记录的作品数据;可选参数</td>
|
||||
<td align="center">false</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
|
||||
14
README_EN.md
14
README_EN.md
@ -131,25 +131,31 @@
|
||||
<tr>
|
||||
<td align="center">url</td>
|
||||
<td align="center">str</td>
|
||||
<td align="center">Xiaohongshu content link, auto-extraction, does not support multiple links</td>
|
||||
<td align="center">Xiaohongshu content link, auto-extraction, does not support multiple links; Required parameter</td>
|
||||
<td align="center">None</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">download</td>
|
||||
<td align="center">bool</td>
|
||||
<td align="center">Whether to download the content file; set to <code>true</code> will take more time</td>
|
||||
<td align="center">Whether to download the content file; set to <code>true</code> will take more time; Optional parameter</td>
|
||||
<td align="center">false</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">index</td>
|
||||
<td align="center">list[int]</td>
|
||||
<td align="center">Download specific image files by index, only effective for text and image works; not effective when the <code>download</code> parameter is set to <code>false</code></td>
|
||||
<td align="center">Download specific image files by index, only effective for text and image works; not effective when the <code>download</code> parameter is set to <code>false</code>; Optional parameter</td>
|
||||
<td align="center">null</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">cookie</td>
|
||||
<td align="center">str</td>
|
||||
<td align="center">Cookies used when requesting data; Optional parameter</td>
|
||||
<td align="center">Settings Cookie Value</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">skip</td>
|
||||
<td align="center">bool</td>
|
||||
<td align="center">Whether to skip content with download records; set to <code>true</code> will not return content data with download records</td>
|
||||
<td align="center">Whether to skip content with download records; set to <code>true</code> will not return content data with download records; Optional parameter</td>
|
||||
<td align="center">false</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
|
||||
@ -39,13 +39,6 @@ class Setting(Screen):
|
||||
Label(self.message("作品文件名称格式"), classes="params", ),
|
||||
Input(self.data["name_format"], placeholder=self.message("发布时间 作者昵称 作品标题"), valid_empty=True,
|
||||
id="name_format", ),
|
||||
# Label(self.message("Sec-Ch-Ua"), classes="params", ),
|
||||
# Input(self.data["sec_ch_ua"], placeholder=self.message("内置 Chrome Sec-Ch-Ua"), valid_empty=True,
|
||||
# id="sec_ch_ua", ),
|
||||
# Label(self.message("Sec-Ch-Ua-Platform"), classes="params", ),
|
||||
# Input(self.data["sec_ch_ua_platform"], placeholder=self.message("内置 Chrome Sec-Ch-Ua-Platform"),
|
||||
# valid_empty=True,
|
||||
# id="sec_ch_ua_platform", ),
|
||||
Label(self.message("User-Agent"), classes="params", ),
|
||||
Input(self.data["user_agent"], placeholder=self.message("内置 Chrome User Agent"), valid_empty=True,
|
||||
id="user_agent", ),
|
||||
@ -110,8 +103,6 @@ class Setting(Screen):
|
||||
"work_path": self.query_one("#work_path").value,
|
||||
"folder_name": self.query_one("#folder_name").value,
|
||||
"name_format": self.query_one("#name_format").value,
|
||||
# "sec_ch_ua": self.query_one("#sec_ch_ua").value,
|
||||
# "sec_ch_ua_platform": self.query_one("#sec_ch_ua_platform").value,
|
||||
"user_agent": self.query_one("#user_agent").value,
|
||||
"cookie": self.query_one("#cookie").value or self.data["cookie"],
|
||||
"proxy": self.query_one("#proxy").value or None,
|
||||
|
||||
@ -247,13 +247,14 @@ class XHS:
|
||||
log,
|
||||
bar,
|
||||
data: bool,
|
||||
cookie: str = None,
|
||||
):
|
||||
if await self.skip_download(i := self.__extract_link_id(url)) and not data:
|
||||
msg = self.message("作品 {0} 存在下载记录,跳过处理").format(i)
|
||||
logging(log, msg)
|
||||
return {"message": msg}
|
||||
logging(log, self.message("开始处理作品:{0}").format(i))
|
||||
html = await self.html.request_url(url, log=log)
|
||||
html = await self.html.request_url(url, log=log, cookie=cookie, )
|
||||
namespace = self.__generate_data_object(html)
|
||||
if not namespace:
|
||||
logging(log, self.message("{0} 获取数据失败").format(i), ERROR)
|
||||
@ -460,6 +461,7 @@ class XHS:
|
||||
None,
|
||||
None,
|
||||
not extract.skip,
|
||||
extract.cookie,
|
||||
):
|
||||
msg = self.message("获取小红书作品数据成功")
|
||||
else:
|
||||
|
||||
@ -23,9 +23,10 @@ class Html:
|
||||
url: str,
|
||||
content=True,
|
||||
log=None,
|
||||
cookie: str = None,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
headers = self.select_headers(url, )
|
||||
headers = self.select_headers(url, cookie, )
|
||||
try:
|
||||
match content:
|
||||
case True:
|
||||
@ -49,8 +50,10 @@ class Html:
|
||||
def format_url(url: str) -> str:
|
||||
return bytes(url, "utf-8").decode("unicode_escape")
|
||||
|
||||
def select_headers(self, url: str) -> dict:
|
||||
return self.headers if "explore" in url else self.blank_headers
|
||||
def select_headers(self, url: str, cookie: str = None, ) -> dict:
|
||||
if "explore" not in url:
|
||||
return self.blank_headers
|
||||
return self.headers | {"Cookie": cookie} if cookie else self.headers
|
||||
|
||||
async def __request_url_head(self, url: str, headers: dict, **kwargs, ):
|
||||
return await self.client.head(
|
||||
|
||||
@ -5,6 +5,7 @@ class ExtractParams(BaseModel):
|
||||
url: str
|
||||
download: bool = False
|
||||
index: list = None
|
||||
cookie: str = None
|
||||
skip: bool = False
|
||||
|
||||
|
||||
|
||||
@ -1,7 +1,8 @@
|
||||
**项目更新内容:**
|
||||
|
||||
1. 优化文件名称非法字符处理
|
||||
2. 适配新版本 HTTPX 库
|
||||
3. 更正英语语言代码
|
||||
4. 优化文件下载功能
|
||||
5. 移除内置延时机制
|
||||
2. 支持 API 模式传入 Cookie
|
||||
3. 适配新版本 HTTPX 库
|
||||
4. 更正英语语言代码
|
||||
5. 优化文件下载功能
|
||||
6. 移除内置延时机制
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user