feat: 支持 API 模式传入 Cookie

This commit is contained in:
JoeanAmier 2024-12-14 14:10:14 +08:00
parent b360eae802
commit 60d32d1327
7 changed files with 35 additions and 25 deletions

View File

@ -127,25 +127,31 @@
<tr>
<td align="center">url</td>
<td align="center">str</td>
<td align="center">小红书作品链接,自动提取,不支持多链接</td>
<td align="center">小红书作品链接,自动提取,不支持多链接;必需参数</td>
<td align="center"></td>
</tr>
<tr>
<td align="center">download</td>
<td align="center">bool</td>
<td align="center">是否下载作品文件;设置为 <code>true</code> 将会耗费更多时间</td>
<td align="center">是否下载作品文件;设置为 <code>true</code> 将会耗费更多时间;可选参数</td>
<td align="center">false</td>
</tr>
<tr>
<td align="center">index</td>
<td align="center">list[int]</td>
<td align="center">下载指定序号的图片文件,仅对图文作品生效;<code>download</code> 参数设置为 <code>false</code> 时不生效</td>
<td align="center">下载指定序号的图片文件,仅对图文作品生效;<code>download</code> 参数设置为 <code>false</code> 时不生效;可选参数</td>
<td align="center">null</td>
</tr>
<tr>
<td align="center">cookie</td>
<td align="center">str</td>
<td align="center">请求数据时使用的 Cookie可选参数</td>
<td align="center">配置文件 Cookie 值</td>
</tr>
<tr>
<td align="center">skip</td>
<td align="center">bool</td>
<td align="center">是否跳过存在下载记录的作品;设置为 <code>true</code> 将不会返回存在下载记录的作品数据</td>
<td align="center">是否跳过存在下载记录的作品;设置为 <code>true</code> 将不会返回存在下载记录的作品数据;可选参数</td>
<td align="center">false</td>
</tr>
</tbody>

View File

@ -131,25 +131,31 @@
<tr>
<td align="center">url</td>
<td align="center">str</td>
<td align="center">Xiaohongshu content link, auto-extraction, does not support multiple links</td>
<td align="center">Xiaohongshu content link, auto-extraction, does not support multiple links; Required parameter</td>
<td align="center">None</td>
</tr>
<tr>
<td align="center">download</td>
<td align="center">bool</td>
<td align="center">Whether to download the content file; set to <code>true</code> will take more time</td>
<td align="center">Whether to download the content file; set to <code>true</code> will take more time; Optional parameter</td>
<td align="center">false</td>
</tr>
<tr>
<td align="center">index</td>
<td align="center">list[int]</td>
<td align="center">Download specific image files by index, only effective for text and image works; not effective when the <code>download</code> parameter is set to <code>false</code></td>
<td align="center">Download specific image files by index, only effective for text and image works; not effective when the <code>download</code> parameter is set to <code>false</code>; Optional parameter</td>
<td align="center">null</td>
</tr>
<tr>
<td align="center">cookie</td>
<td align="center">str</td>
<td align="center">Cookies used when requesting data; Optional parameter</td>
<td align="center">Settings Cookie Value</td>
</tr>
<tr>
<td align="center">skip</td>
<td align="center">bool</td>
<td align="center">Whether to skip content with download records; set to <code>true</code> will not return content data with download records</td>
<td align="center">Whether to skip content with download records; set to <code>true</code> will not return content data with download records; Optional parameter</td>
<td align="center">false</td>
</tr>
</tbody>

View File

@ -39,13 +39,6 @@ class Setting(Screen):
Label(self.message("作品文件名称格式"), classes="params", ),
Input(self.data["name_format"], placeholder=self.message("发布时间 作者昵称 作品标题"), valid_empty=True,
id="name_format", ),
# Label(self.message("Sec-Ch-Ua"), classes="params", ),
# Input(self.data["sec_ch_ua"], placeholder=self.message("内置 Chrome Sec-Ch-Ua"), valid_empty=True,
# id="sec_ch_ua", ),
# Label(self.message("Sec-Ch-Ua-Platform"), classes="params", ),
# Input(self.data["sec_ch_ua_platform"], placeholder=self.message("内置 Chrome Sec-Ch-Ua-Platform"),
# valid_empty=True,
# id="sec_ch_ua_platform", ),
Label(self.message("User-Agent"), classes="params", ),
Input(self.data["user_agent"], placeholder=self.message("内置 Chrome User Agent"), valid_empty=True,
id="user_agent", ),
@ -110,8 +103,6 @@ class Setting(Screen):
"work_path": self.query_one("#work_path").value,
"folder_name": self.query_one("#folder_name").value,
"name_format": self.query_one("#name_format").value,
# "sec_ch_ua": self.query_one("#sec_ch_ua").value,
# "sec_ch_ua_platform": self.query_one("#sec_ch_ua_platform").value,
"user_agent": self.query_one("#user_agent").value,
"cookie": self.query_one("#cookie").value or self.data["cookie"],
"proxy": self.query_one("#proxy").value or None,

View File

@ -247,13 +247,14 @@ class XHS:
log,
bar,
data: bool,
cookie: str = None,
):
if await self.skip_download(i := self.__extract_link_id(url)) and not data:
msg = self.message("作品 {0} 存在下载记录,跳过处理").format(i)
logging(log, msg)
return {"message": msg}
logging(log, self.message("开始处理作品:{0}").format(i))
html = await self.html.request_url(url, log=log)
html = await self.html.request_url(url, log=log, cookie=cookie, )
namespace = self.__generate_data_object(html)
if not namespace:
logging(log, self.message("{0} 获取数据失败").format(i), ERROR)
@ -460,6 +461,7 @@ class XHS:
None,
None,
not extract.skip,
extract.cookie,
):
msg = self.message("获取小红书作品数据成功")
else:

View File

@ -23,9 +23,10 @@ class Html:
url: str,
content=True,
log=None,
cookie: str = None,
**kwargs,
) -> str:
headers = self.select_headers(url, )
headers = self.select_headers(url, cookie, )
try:
match content:
case True:
@ -49,8 +50,10 @@ class Html:
def format_url(url: str) -> str:
return bytes(url, "utf-8").decode("unicode_escape")
def select_headers(self, url: str) -> dict:
return self.headers if "explore" in url else self.blank_headers
def select_headers(self, url: str, cookie: str = None, ) -> dict:
if "explore" not in url:
return self.blank_headers
return self.headers | {"Cookie": cookie} if cookie else self.headers
async def __request_url_head(self, url: str, headers: dict, **kwargs, ):
return await self.client.head(

View File

@ -5,6 +5,7 @@ class ExtractParams(BaseModel):
url: str
download: bool = False
index: list = None
cookie: str = None
skip: bool = False

View File

@ -1,7 +1,8 @@
**项目更新内容:**
1. 优化文件名称非法字符处理
2. 适配新版本 HTTPX 库
3. 更正英语语言代码
4. 优化文件下载功能
5. 移除内置延时机制
2. 支持 API 模式传入 Cookie
3. 适配新版本 HTTPX 库
4. 更正英语语言代码
5. 优化文件下载功能
6. 移除内置延时机制