mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2025-12-26 04:48:05 +08:00
使用 HTTPX 替代 AIOHTTP
This commit is contained in:
parent
01711be950
commit
e7c4d6daee
15
README.md
15
README.md
@ -151,6 +151,8 @@ async def example():
|
|||||||
# 实例对象
|
# 实例对象
|
||||||
work_path = "D:\\" # 作品数据/文件保存根路径,默认值:项目根路径
|
work_path = "D:\\" # 作品数据/文件保存根路径,默认值:项目根路径
|
||||||
folder_name = "Download" # 作品文件储存文件夹名称(自动创建),默认值:Download
|
folder_name = "Download" # 作品文件储存文件夹名称(自动创建),默认值:Download
|
||||||
|
name_format = "作品标题 作品描述"
|
||||||
|
user_agent = "" # User-Agent
|
||||||
cookie = "" # 小红书网页版 Cookie,无需登录,必需参数,登录状态对数据采集有影响
|
cookie = "" # 小红书网页版 Cookie,无需登录,必需参数,登录状态对数据采集有影响
|
||||||
proxy = None # 网络代理
|
proxy = None # 网络代理
|
||||||
timeout = 5 # 请求数据超时限制,单位:秒,默认值:10
|
timeout = 5 # 请求数据超时限制,单位:秒,默认值:10
|
||||||
@ -163,6 +165,8 @@ async def example():
|
|||||||
pass # 使用默认参数
|
pass # 使用默认参数
|
||||||
async with XHS(work_path=work_path,
|
async with XHS(work_path=work_path,
|
||||||
folder_name=folder_name,
|
folder_name=folder_name,
|
||||||
|
name_format=name_format,
|
||||||
|
user_agent=user_agent,
|
||||||
cookie=cookie,
|
cookie=cookie,
|
||||||
proxy=proxy,
|
proxy=proxy,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
@ -212,6 +216,12 @@ async def example():
|
|||||||
<td align="center"><code>发布时间 作者昵称 作品标题</code></td>
|
<td align="center"><code>发布时间 作者昵称 作品标题</code></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
|
<td align="center">user_agent</td>
|
||||||
|
<td align="center">str</td>
|
||||||
|
<td align="center">浏览器 User-Agent</td>
|
||||||
|
<td align="center">内置 chrome user-agent</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
<td align="center">cookie</td>
|
<td align="center">cookie</td>
|
||||||
<td align="center">str</td>
|
<td align="center">str</td>
|
||||||
<td align="center">小红书网页版 Cookie,<b>无需登录</b></td>
|
<td align="center">小红书网页版 Cookie,<b>无需登录</b></td>
|
||||||
@ -219,7 +229,7 @@ async def example():
|
|||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td align="center">proxy</td>
|
<td align="center">proxy</td>
|
||||||
<td align="center">str</td>
|
<td align="center">str|dict</td>
|
||||||
<td align="center">设置程序代理</td>
|
<td align="center">设置程序代理</td>
|
||||||
<td align="center">null</td>
|
<td align="center">null</td>
|
||||||
</tr>
|
</tr>
|
||||||
@ -349,7 +359,8 @@ async def example():
|
|||||||
|
|
||||||
# 💡 代码参考
|
# 💡 代码参考
|
||||||
|
|
||||||
* https://docs.aiohttp.org/en/stable/
|
* https://github.com/encode/httpx/
|
||||||
|
* https://github.com/tiangolo/fastapi
|
||||||
* https://textual.textualize.io/
|
* https://textual.textualize.io/
|
||||||
* https://aiosqlite.omnilib.dev/en/stable/
|
* https://aiosqlite.omnilib.dev/en/stable/
|
||||||
* https://click.palletsprojects.com/en/8.1.x/
|
* https://click.palletsprojects.com/en/8.1.x/
|
||||||
|
|||||||
Binary file not shown.
@ -270,3 +270,18 @@ msgstr "Web API server has been shut down!"
|
|||||||
|
|
||||||
msgid "服务器主机及端口: {0}"
|
msgid "服务器主机及端口: {0}"
|
||||||
msgstr "Server host and port: {0}"
|
msgstr "Server host and port: {0}"
|
||||||
|
|
||||||
|
msgid "内置 Chrome User-Agent"
|
||||||
|
msgstr "Built in Chrome User Agent"
|
||||||
|
|
||||||
|
msgid "proxy 参数 {0} 设置错误,程序将不会使用代理"
|
||||||
|
msgstr "The proxy parameter {0} is set incorrectly, and the program will not use the proxy"
|
||||||
|
|
||||||
|
msgid "代理 {0} 测试成功"
|
||||||
|
msgstr "Agent {0} tested successfully"
|
||||||
|
|
||||||
|
msgid "代理 {0} 测试超时"
|
||||||
|
msgstr "Agent {0} test timeout"
|
||||||
|
|
||||||
|
msgid "代理 {0} 测试失败:{1}"
|
||||||
|
msgstr "Agent {0} test failed: {1}"
|
||||||
|
|||||||
@ -270,3 +270,18 @@ msgstr ""
|
|||||||
|
|
||||||
msgid "服务器主机及端口: {0}"
|
msgid "服务器主机及端口: {0}"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
|
msgid "内置 Chrome User-Agent"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
msgid "proxy 参数 {0} 设置错误,程序将不会使用代理"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
msgid "代理 {0} 测试成功"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
msgid "代理 {0} 测试超时"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
msgid "代理 {0} 测试失败:{1}"
|
||||||
|
msgstr ""
|
||||||
|
|||||||
7
main.py
7
main.py
@ -16,6 +16,8 @@ async def example():
|
|||||||
# 实例对象
|
# 实例对象
|
||||||
work_path = "D:\\" # 作品数据/文件保存根路径,默认值:项目根路径
|
work_path = "D:\\" # 作品数据/文件保存根路径,默认值:项目根路径
|
||||||
folder_name = "Download" # 作品文件储存文件夹名称(自动创建),默认值:Download
|
folder_name = "Download" # 作品文件储存文件夹名称(自动创建),默认值:Download
|
||||||
|
name_format = "作品标题 作品描述"
|
||||||
|
user_agent = "" # User-Agent
|
||||||
cookie = "" # 小红书网页版 Cookie,无需登录,必需参数,登录状态对数据采集有影响
|
cookie = "" # 小红书网页版 Cookie,无需登录,必需参数,登录状态对数据采集有影响
|
||||||
proxy = None # 网络代理
|
proxy = None # 网络代理
|
||||||
timeout = 5 # 请求数据超时限制,单位:秒,默认值:10
|
timeout = 5 # 请求数据超时限制,单位:秒,默认值:10
|
||||||
@ -28,6 +30,8 @@ async def example():
|
|||||||
pass # 使用默认参数
|
pass # 使用默认参数
|
||||||
async with XHS(work_path=work_path,
|
async with XHS(work_path=work_path,
|
||||||
folder_name=folder_name,
|
folder_name=folder_name,
|
||||||
|
name_format=name_format,
|
||||||
|
user_agent=user_agent,
|
||||||
cookie=cookie,
|
cookie=cookie,
|
||||||
proxy=proxy,
|
proxy=proxy,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
@ -60,6 +64,7 @@ if __name__ == '__main__':
|
|||||||
if len(argv) == 1:
|
if len(argv) == 1:
|
||||||
run(app())
|
run(app())
|
||||||
elif argv[1] == "server":
|
elif argv[1] == "server":
|
||||||
run(server())
|
print("该模式重构中!")
|
||||||
|
# run(server())
|
||||||
else:
|
else:
|
||||||
cli()
|
cli()
|
||||||
|
|||||||
@ -1,4 +1,3 @@
|
|||||||
aiohttp>=3.9.1
|
|
||||||
textual>=0.47.1
|
textual>=0.47.1
|
||||||
pyperclip>=1.8.2
|
pyperclip>=1.8.2
|
||||||
lxml>=5.1.0
|
lxml>=5.1.0
|
||||||
@ -6,3 +5,6 @@ PyYAML>=6.0.1
|
|||||||
aiosqlite>=0.20.0
|
aiosqlite>=0.20.0
|
||||||
click>=8.1.7
|
click>=8.1.7
|
||||||
browser_cookie3>=0.19.1
|
browser_cookie3>=0.19.1
|
||||||
|
httpx>=0.27.0
|
||||||
|
fastapi>=0.110.0
|
||||||
|
uvicorn>=0.24.0
|
||||||
|
|||||||
@ -43,7 +43,11 @@ class XHSDownloader(App):
|
|||||||
def __initialization(self) -> None:
|
def __initialization(self) -> None:
|
||||||
self.parameter = self.SETTINGS.run()
|
self.parameter = self.SETTINGS.run()
|
||||||
self.message = Translate(self.parameter["language"]).message()
|
self.message = Translate(self.parameter["language"]).message()
|
||||||
self.APP = XHS(**self.parameter, transition=self.message)
|
self.APP = XHS(
|
||||||
|
**self.parameter,
|
||||||
|
transition=self.message,
|
||||||
|
_print=False,
|
||||||
|
)
|
||||||
|
|
||||||
async def on_mount(self) -> None:
|
async def on_mount(self) -> None:
|
||||||
self.install_screen(
|
self.install_screen(
|
||||||
|
|||||||
@ -87,6 +87,7 @@ class Index(Screen):
|
|||||||
">" *
|
">" *
|
||||||
50}",
|
50}",
|
||||||
style=MASTER), scroll_end=False)
|
style=MASTER), scroll_end=False)
|
||||||
|
self.xhs.manager.print_proxy_tip(log=self.tip, )
|
||||||
|
|
||||||
@on(Button.Pressed, "#deal")
|
@on(Button.Pressed, "#deal")
|
||||||
async def deal_button(self):
|
async def deal_button(self):
|
||||||
|
|||||||
@ -39,6 +39,9 @@ class Setting(Screen):
|
|||||||
Label(self.message("作品文件名称格式"), classes="params", ),
|
Label(self.message("作品文件名称格式"), classes="params", ),
|
||||||
Input(self.data["name_format"], placeholder=self.message("发布时间 作者昵称 作品标题"), valid_empty=True,
|
Input(self.data["name_format"], placeholder=self.message("发布时间 作者昵称 作品标题"), valid_empty=True,
|
||||||
id="name_format", ),
|
id="name_format", ),
|
||||||
|
Label(self.message("User-Agent"), classes="params", ),
|
||||||
|
Input(self.data["user_agent"], placeholder=self.message("内置 Chrome User-Agent"), valid_empty=True,
|
||||||
|
id="user_agent", ),
|
||||||
Label(self.message("小红书网页版 Cookie"), classes="params", ),
|
Label(self.message("小红书网页版 Cookie"), classes="params", ),
|
||||||
Input(placeholder=self.__check_cookie(), valid_empty=True, id="cookie", ),
|
Input(placeholder=self.__check_cookie(), valid_empty=True, id="cookie", ),
|
||||||
Label(self.message("网络代理"), classes="params", ),
|
Label(self.message("网络代理"), classes="params", ),
|
||||||
@ -98,6 +101,7 @@ class Setting(Screen):
|
|||||||
"work_path": self.query_one("#work_path").value,
|
"work_path": self.query_one("#work_path").value,
|
||||||
"folder_name": self.query_one("#folder_name").value,
|
"folder_name": self.query_one("#folder_name").value,
|
||||||
"name_format": self.query_one("#name_format").value,
|
"name_format": self.query_one("#name_format").value,
|
||||||
|
"user_agent": self.query_one("#user_agent").value,
|
||||||
"cookie": self.query_one("#cookie").value or self.data["cookie"],
|
"cookie": self.query_one("#cookie").value or self.data["cookie"],
|
||||||
"proxy": self.query_one("#proxy").value or None,
|
"proxy": self.query_one("#proxy").value or None,
|
||||||
"timeout": int(self.query_one("#timeout").value),
|
"timeout": int(self.query_one("#timeout").value),
|
||||||
|
|||||||
@ -1,6 +1,5 @@
|
|||||||
from typing import Callable
|
from typing import Callable
|
||||||
|
|
||||||
from aiohttp import ClientTimeout
|
|
||||||
from rich.text import Text
|
from rich.text import Text
|
||||||
from textual import work
|
from textual import work
|
||||||
from textual.app import ComposeResult
|
from textual.app import ComposeResult
|
||||||
@ -39,7 +38,7 @@ class Update(ModalScreen):
|
|||||||
@work()
|
@work()
|
||||||
async def check_update(self) -> None:
|
async def check_update(self) -> None:
|
||||||
try:
|
try:
|
||||||
url = await self.xhs.html.request_url(RELEASES, False, None, timeout=ClientTimeout(connect=5))
|
url = await self.xhs.html.request_url(RELEASES, False, None, timeout=5, )
|
||||||
version = url.split("/")[-1]
|
version = url.split("/")[-1]
|
||||||
match self.compare_versions(f"{VERSION_MAJOR}.{VERSION_MINOR}", version, VERSION_BETA):
|
match self.compare_versions(f"{VERSION_MAJOR}.{VERSION_MINOR}", version, VERSION_BETA):
|
||||||
case 4:
|
case 4:
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
from asyncio import CancelledError
|
# from asyncio import CancelledError
|
||||||
from asyncio import Event
|
from asyncio import Event
|
||||||
from asyncio import Queue
|
from asyncio import Queue
|
||||||
from asyncio import QueueEmpty
|
from asyncio import QueueEmpty
|
||||||
@ -10,7 +10,7 @@ from re import compile
|
|||||||
from typing import Callable
|
from typing import Callable
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from aiohttp import web
|
# from aiohttp import web
|
||||||
from pyperclip import paste
|
from pyperclip import paste
|
||||||
|
|
||||||
from source.expansion import BrowserCookie
|
from source.expansion import BrowserCookie
|
||||||
@ -24,7 +24,7 @@ from source.module import (
|
|||||||
ERROR,
|
ERROR,
|
||||||
WARNING,
|
WARNING,
|
||||||
MASTER,
|
MASTER,
|
||||||
REPOSITORY,
|
# REPOSITORY,
|
||||||
)
|
)
|
||||||
from source.module import Translate
|
from source.module import Translate
|
||||||
from source.module import logging
|
from source.module import logging
|
||||||
@ -55,7 +55,7 @@ class XHS:
|
|||||||
name_format="发布时间 作者昵称 作品标题",
|
name_format="发布时间 作者昵称 作品标题",
|
||||||
user_agent: str = None,
|
user_agent: str = None,
|
||||||
cookie: str = None,
|
cookie: str = None,
|
||||||
proxy: str = None,
|
proxy: str | dict = None,
|
||||||
timeout=10,
|
timeout=10,
|
||||||
chunk=1024 * 1024,
|
chunk=1024 * 1024,
|
||||||
max_retry=5,
|
max_retry=5,
|
||||||
@ -69,6 +69,7 @@ class XHS:
|
|||||||
# server=False,
|
# server=False,
|
||||||
transition: Callable[[str], str] = None,
|
transition: Callable[[str], str] = None,
|
||||||
read_cookie: int | str = None,
|
read_cookie: int | str = None,
|
||||||
|
_print: bool = True,
|
||||||
*args,
|
*args,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
@ -78,8 +79,8 @@ class XHS:
|
|||||||
work_path,
|
work_path,
|
||||||
folder_name,
|
folder_name,
|
||||||
name_format,
|
name_format,
|
||||||
# user_agent,
|
|
||||||
chunk,
|
chunk,
|
||||||
|
user_agent,
|
||||||
self.read_browser_cookie(read_cookie) or cookie,
|
self.read_browser_cookie(read_cookie) or cookie,
|
||||||
proxy,
|
proxy,
|
||||||
timeout,
|
timeout,
|
||||||
@ -92,6 +93,7 @@ class XHS:
|
|||||||
folder_mode,
|
folder_mode,
|
||||||
# server,
|
# server,
|
||||||
self.message,
|
self.message,
|
||||||
|
_print,
|
||||||
)
|
)
|
||||||
self.html = Html(self.manager)
|
self.html = Html(self.manager)
|
||||||
self.image = Image()
|
self.image = Image()
|
||||||
@ -104,8 +106,8 @@ class XHS:
|
|||||||
self.clipboard_cache: str = ""
|
self.clipboard_cache: str = ""
|
||||||
self.queue = Queue()
|
self.queue = Queue()
|
||||||
self.event = Event()
|
self.event = Event()
|
||||||
self.runner = self.init_server()
|
# self.runner = self.init_server()
|
||||||
self.site = None
|
# self.site = None
|
||||||
|
|
||||||
def __extract_image(self, container: dict, data: Namespace):
|
def __extract_image(self, container: dict, data: Namespace):
|
||||||
container["下载地址"], container["动图地址"] = self.image.get_image_link(
|
container["下载地址"], container["动图地址"] = self.image.get_image_link(
|
||||||
@ -232,7 +234,8 @@ class XHS:
|
|||||||
values.append(data[key])
|
values.append(data[key])
|
||||||
return self.manager.SEPARATE.join(values)
|
return self.manager.SEPARATE.join(values)
|
||||||
|
|
||||||
def __get_name_time(self, data: dict) -> str:
|
@staticmethod
|
||||||
|
def __get_name_time(data: dict) -> str:
|
||||||
return data["发布时间"].replace(":", ".")
|
return data["发布时间"].replace(":", ".")
|
||||||
|
|
||||||
def __get_name_author(self, data: dict) -> str:
|
def __get_name_author(self, data: dict) -> str:
|
||||||
@ -290,49 +293,49 @@ class XHS:
|
|||||||
return BrowserCookie.get(
|
return BrowserCookie.get(
|
||||||
value, domain="xiaohongshu.com") if value else ""
|
value, domain="xiaohongshu.com") if value else ""
|
||||||
|
|
||||||
@staticmethod
|
# @staticmethod
|
||||||
async def index(request):
|
# async def index(request):
|
||||||
return web.HTTPFound(REPOSITORY)
|
# return web.HTTPFound(REPOSITORY)
|
||||||
|
|
||||||
async def handle(self, request):
|
# async def handle(self, request):
|
||||||
data = await request.post()
|
# data = await request.post()
|
||||||
url = data.get("url")
|
# url = data.get("url")
|
||||||
download = data.get("download", False)
|
# download = data.get("download", False)
|
||||||
index = data.get("index")
|
# index = data.get("index")
|
||||||
skip = data.get("skip", False)
|
# skip = data.get("skip", False)
|
||||||
url = await self.__extract_links(url, None)
|
# url = await self.__extract_links(url, None)
|
||||||
if not url:
|
# if not url:
|
||||||
msg = self.message("提取小红书作品链接失败")
|
# msg = self.message("提取小红书作品链接失败")
|
||||||
data = None
|
# data = None
|
||||||
else:
|
# else:
|
||||||
if data := await self.__deal_extract(url[0], download, index, None, None, not skip, ):
|
# if data := await self.__deal_extract(url[0], download, index, None, None, not skip, ):
|
||||||
msg = self.message("获取小红书作品数据成功")
|
# msg = self.message("获取小红书作品数据成功")
|
||||||
else:
|
# else:
|
||||||
msg = self.message("获取小红书作品数据失败")
|
# msg = self.message("获取小红书作品数据失败")
|
||||||
data = None
|
# data = None
|
||||||
return web.json_response(dict(message=msg, url=url[0], data=data))
|
# return web.json_response(dict(message=msg, url=url[0], data=data))
|
||||||
|
|
||||||
def init_server(self, ):
|
# def init_server(self, ):
|
||||||
app = web.Application(debug=True)
|
# app = web.Application(debug=True)
|
||||||
app.router.add_get('/', self.index)
|
# app.router.add_get('/', self.index)
|
||||||
app.router.add_post('/xhs/', self.handle)
|
# app.router.add_post('/xhs/', self.handle)
|
||||||
return web.AppRunner(app)
|
# return web.AppRunner(app)
|
||||||
|
|
||||||
async def run_server(self, log=None, ):
|
# async def run_server(self, log=None, ):
|
||||||
try:
|
# try:
|
||||||
await self.start_server(log)
|
# await self.start_server(log)
|
||||||
while True:
|
# while True:
|
||||||
await sleep(3600) # 保持服务器运行
|
# await sleep(3600) # 保持服务器运行
|
||||||
except (CancelledError, KeyboardInterrupt):
|
# except (CancelledError, KeyboardInterrupt):
|
||||||
await self.close_server(log)
|
# await self.close_server(log)
|
||||||
|
|
||||||
async def start_server(self, log=None, ):
|
# async def start_server(self, log=None, ):
|
||||||
await self.runner.setup()
|
# await self.runner.setup()
|
||||||
self.site = web.TCPSite(self.runner, "0.0.0.0")
|
# self.site = web.TCPSite(self.runner, "0.0.0.0")
|
||||||
await self.site.start()
|
# await self.site.start()
|
||||||
logging(log, self.message("Web API 服务器已启动!"))
|
# logging(log, self.message("Web API 服务器已启动!"))
|
||||||
logging(log, self.message("服务器主机及端口: {0}".format(self.site.name, )))
|
# logging(log, self.message("服务器主机及端口: {0}".format(self.site.name, )))
|
||||||
|
|
||||||
async def close_server(self, log=None, ):
|
# async def close_server(self, log=None, ):
|
||||||
await self.runner.cleanup()
|
# await self.runner.cleanup()
|
||||||
logging(log, self.message("Web API 服务器已关闭!"))
|
# logging(log, self.message("Web API 服务器已关闭!"))
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
from asyncio import gather
|
from asyncio import gather
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from aiohttp import ClientError
|
from httpx import HTTPError
|
||||||
|
|
||||||
from source.module import ERROR
|
from source.module import ERROR
|
||||||
from source.module import Manager
|
from source.module import Manager
|
||||||
@ -25,9 +25,8 @@ class Download:
|
|||||||
self.manager = manager
|
self.manager = manager
|
||||||
self.folder = manager.folder
|
self.folder = manager.folder
|
||||||
self.temp = manager.temp
|
self.temp = manager.temp
|
||||||
self.proxy = manager.proxy
|
|
||||||
self.chunk = manager.chunk
|
self.chunk = manager.chunk
|
||||||
self.session = manager.download_session
|
self.client = manager.download_client
|
||||||
self.retry = manager.retry
|
self.retry = manager.retry
|
||||||
self.message = manager.message
|
self.message = manager.message
|
||||||
self.folder_mode = manager.folder_mode
|
self.folder_mode = manager.folder_mode
|
||||||
@ -117,11 +116,11 @@ class Download:
|
|||||||
async def __download(self, url: str, path: Path, name: str, format_: str, log, bar):
|
async def __download(self, url: str, path: Path, name: str, format_: str, log, bar):
|
||||||
temp = self.temp.joinpath(f"{name}.{format_}")
|
temp = self.temp.joinpath(f"{name}.{format_}")
|
||||||
try:
|
try:
|
||||||
async with self.session.get(url, proxy=self.proxy) as response:
|
async with self.client.stream("GET", url, ) as response:
|
||||||
if response.status != 200:
|
if response.status_code != 200:
|
||||||
logging(
|
logging(
|
||||||
log, self.message("链接 {0} 请求失败,响应码 {1}").format(
|
log, self.message("链接 {0} 请求失败,响应码 {1}").format(
|
||||||
url, response.status), style=ERROR)
|
url, response.status_code), style=ERROR)
|
||||||
return False
|
return False
|
||||||
suffix = self.__extract_type(
|
suffix = self.__extract_type(
|
||||||
response.headers.get("Content-Type")) or format_
|
response.headers.get("Content-Type")) or format_
|
||||||
@ -131,14 +130,14 @@ class Download:
|
|||||||
# response.headers.get(
|
# response.headers.get(
|
||||||
# 'content-length', 0)) or None)
|
# 'content-length', 0)) or None)
|
||||||
with temp.open("wb") as f:
|
with temp.open("wb") as f:
|
||||||
async for chunk in response.content.iter_chunked(self.chunk):
|
async for chunk in response.aiter_bytes(self.chunk):
|
||||||
f.write(chunk)
|
f.write(chunk)
|
||||||
# self.__update_progress(bar, len(chunk))
|
# self.__update_progress(bar, len(chunk))
|
||||||
self.manager.move(temp, real)
|
self.manager.move(temp, real)
|
||||||
# self.__create_progress(bar, None)
|
# self.__create_progress(bar, None)
|
||||||
logging(log, self.message("文件 {0} 下载成功").format(real.name))
|
logging(log, self.message("文件 {0} 下载成功").format(real.name))
|
||||||
return True
|
return True
|
||||||
except ClientError as error:
|
except HTTPError as error:
|
||||||
self.manager.delete(temp)
|
self.manager.delete(temp)
|
||||||
# self.__create_progress(bar, None)
|
# self.__create_progress(bar, None)
|
||||||
logging(log, str(error), ERROR)
|
logging(log, str(error), ERROR)
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
from aiohttp import ClientError
|
from httpx import HTTPError
|
||||||
|
|
||||||
from source.module import ERROR
|
from source.module import ERROR
|
||||||
from source.module import Manager
|
from source.module import Manager
|
||||||
@ -10,10 +10,9 @@ __all__ = ["Html"]
|
|||||||
|
|
||||||
class Html:
|
class Html:
|
||||||
def __init__(self, manager: Manager, ):
|
def __init__(self, manager: Manager, ):
|
||||||
self.proxy = manager.proxy
|
|
||||||
self.retry = manager.retry
|
self.retry = manager.retry
|
||||||
self.message = manager.message
|
self.message = manager.message
|
||||||
self.session = manager.request_session
|
self.client = manager.request_client
|
||||||
|
|
||||||
@retry
|
@retry
|
||||||
async def request_url(
|
async def request_url(
|
||||||
@ -24,15 +23,14 @@ class Html:
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
) -> str:
|
) -> str:
|
||||||
try:
|
try:
|
||||||
async with self.session.get(
|
response = await self.client.get(
|
||||||
url,
|
url,
|
||||||
proxy=self.proxy,
|
**kwargs,
|
||||||
**kwargs,
|
)
|
||||||
) as response:
|
if response.status_code != 200:
|
||||||
if response.status != 200:
|
return ""
|
||||||
return ""
|
return response.text if content else str(response.url)
|
||||||
return await response.text() if content else str(response.url)
|
except HTTPError as error:
|
||||||
except ClientError as error:
|
|
||||||
logging(log, str(error), ERROR)
|
logging(log, str(error), ERROR)
|
||||||
logging(
|
logging(
|
||||||
log, self.message("网络异常,请求 {0} 失败").format(url), ERROR)
|
log, self.message("网络异常,请求 {0} 失败").format(url), ERROR)
|
||||||
|
|||||||
@ -21,6 +21,7 @@ from .static import (
|
|||||||
USERSCRIPT,
|
USERSCRIPT,
|
||||||
HEADERS,
|
HEADERS,
|
||||||
PROJECT,
|
PROJECT,
|
||||||
|
USERAGENT,
|
||||||
)
|
)
|
||||||
from .tools import (
|
from .tools import (
|
||||||
retry,
|
retry,
|
||||||
@ -54,4 +55,5 @@ __all__ = [
|
|||||||
"PROJECT",
|
"PROJECT",
|
||||||
"Translate",
|
"Translate",
|
||||||
"DataRecorder",
|
"DataRecorder",
|
||||||
|
"USERAGENT",
|
||||||
]
|
]
|
||||||
|
|||||||
@ -5,10 +5,15 @@ from shutil import move
|
|||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
|
|
||||||
from aiohttp import ClientSession
|
from httpx import AsyncClient
|
||||||
from aiohttp import ClientTimeout
|
from httpx import RequestError
|
||||||
|
from httpx import TimeoutException
|
||||||
|
from httpx import get
|
||||||
|
|
||||||
from .static import HEADERS
|
from .static import HEADERS
|
||||||
|
from .static import USERAGENT
|
||||||
|
from .static import WARNING
|
||||||
|
from .tools import logging
|
||||||
|
|
||||||
__all__ = ["Manager"]
|
__all__ = ["Manager"]
|
||||||
|
|
||||||
@ -30,6 +35,10 @@ class Manager:
|
|||||||
'作者昵称',
|
'作者昵称',
|
||||||
'作者ID',
|
'作者ID',
|
||||||
)
|
)
|
||||||
|
NO_PROXY = {
|
||||||
|
"http://": None,
|
||||||
|
"https://": None,
|
||||||
|
}
|
||||||
SEPARATE = "_"
|
SEPARATE = "_"
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -39,8 +48,9 @@ class Manager:
|
|||||||
folder: str,
|
folder: str,
|
||||||
name_format: str,
|
name_format: str,
|
||||||
chunk: int,
|
chunk: int,
|
||||||
|
user_agent: str,
|
||||||
cookie: str,
|
cookie: str,
|
||||||
proxy: str,
|
proxy: str | dict,
|
||||||
timeout: int,
|
timeout: int,
|
||||||
retry: int,
|
retry: int,
|
||||||
record_data: bool,
|
record_data: bool,
|
||||||
@ -51,12 +61,14 @@ class Manager:
|
|||||||
folder_mode: bool,
|
folder_mode: bool,
|
||||||
# server: bool,
|
# server: bool,
|
||||||
transition: Callable[[str], str],
|
transition: Callable[[str], str],
|
||||||
|
_print: bool,
|
||||||
):
|
):
|
||||||
self.root = root
|
self.root = root
|
||||||
self.temp = root.joinpath("./temp")
|
self.temp = root.joinpath("./temp")
|
||||||
self.path = self.__check_path(path)
|
self.path = self.__check_path(path)
|
||||||
self.folder = self.__check_folder(folder)
|
self.folder = self.__check_folder(folder)
|
||||||
self.blank_headers = HEADERS
|
self.message = transition
|
||||||
|
self.blank_headers = HEADERS | {"User-Agent": user_agent or USERAGENT}
|
||||||
self.headers = self.blank_headers | {"Cookie": cookie}
|
self.headers = self.blank_headers | {"Cookie": cookie}
|
||||||
self.retry = retry
|
self.retry = retry
|
||||||
self.chunk = chunk
|
self.chunk = chunk
|
||||||
@ -64,16 +76,20 @@ class Manager:
|
|||||||
self.record_data = self.check_bool(record_data, False)
|
self.record_data = self.check_bool(record_data, False)
|
||||||
self.image_format = self.__check_image_format(image_format)
|
self.image_format = self.__check_image_format(image_format)
|
||||||
self.folder_mode = self.check_bool(folder_mode, False)
|
self.folder_mode = self.check_bool(folder_mode, False)
|
||||||
self.proxy = proxy
|
self.proxy_tip = None
|
||||||
self.request_session = ClientSession(
|
self.proxy = self.__check_proxy(proxy)
|
||||||
|
self.print_proxy_tip(_print, )
|
||||||
|
self.request_client = AsyncClient(
|
||||||
headers=self.headers | {
|
headers=self.headers | {
|
||||||
"Referer": "https://www.xiaohongshu.com/explore", },
|
"Referer": "https://www.xiaohongshu.com/explore", },
|
||||||
timeout=ClientTimeout(connect=timeout),
|
timeout=timeout,
|
||||||
|
**self.proxy,
|
||||||
)
|
)
|
||||||
self.download_session = ClientSession(
|
self.download_client = AsyncClient(
|
||||||
headers=self.blank_headers,
|
headers=self.blank_headers,
|
||||||
timeout=ClientTimeout(connect=timeout))
|
timeout=timeout,
|
||||||
self.message = transition
|
**self.proxy,
|
||||||
|
)
|
||||||
self.image_download = self.check_bool(image_download, True)
|
self.image_download = self.check_bool(image_download, True)
|
||||||
self.video_download = self.check_bool(video_download, True)
|
self.video_download = self.check_bool(video_download, True)
|
||||||
self.live_download = self.check_bool(live_download, True)
|
self.live_download = self.check_bool(live_download, True)
|
||||||
@ -134,8 +150,8 @@ class Manager:
|
|||||||
return value if isinstance(value, bool) else default
|
return value if isinstance(value, bool) else default
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
await self.request_session.close()
|
await self.request_client.aclose()
|
||||||
await self.download_session.close()
|
await self.download_client.aclose()
|
||||||
self.__clean()
|
self.__clean()
|
||||||
|
|
||||||
def __check_name_format(self, format_: str) -> str:
|
def __check_name_format(self, format_: str) -> str:
|
||||||
@ -148,3 +164,38 @@ class Manager:
|
|||||||
),
|
),
|
||||||
format_,
|
format_,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def __check_proxy(
|
||||||
|
self,
|
||||||
|
proxy: str | dict,
|
||||||
|
url="https://www.baidu.com/",
|
||||||
|
) -> dict:
|
||||||
|
if not proxy:
|
||||||
|
return {"proxies": self.NO_PROXY}
|
||||||
|
if isinstance(proxy, str):
|
||||||
|
kwarg = {"proxy": proxy}
|
||||||
|
elif isinstance(proxy, dict):
|
||||||
|
kwarg = {"proxies": proxy}
|
||||||
|
else:
|
||||||
|
self.proxy_tip = (
|
||||||
|
self.message("proxy 参数 {0} 设置错误,程序将不会使用代理").format(proxy), WARNING,)
|
||||||
|
return {"proxies": self.NO_PROXY}
|
||||||
|
try:
|
||||||
|
response = get(
|
||||||
|
url,
|
||||||
|
**kwarg, )
|
||||||
|
if response.status_code < 400:
|
||||||
|
self.proxy_tip = (self.message("代理 {0} 测试成功").format(proxy),)
|
||||||
|
return kwarg
|
||||||
|
except TimeoutException:
|
||||||
|
self.proxy_tip = (
|
||||||
|
self.message("代理 {0} 测试超时").format(proxy), WARNING,)
|
||||||
|
except RequestError as e:
|
||||||
|
self.proxy_tip = (
|
||||||
|
self.message("代理 {0} 测试失败:{1}").format(
|
||||||
|
proxy, e), WARNING,)
|
||||||
|
return {"proxies": self.NO_PROXY}
|
||||||
|
|
||||||
|
def print_proxy_tip(self, _print: bool = True, log=None, ) -> None:
|
||||||
|
if _print and self.proxy_tip:
|
||||||
|
logging(log, *self.proxy_tip)
|
||||||
|
|||||||
@ -4,6 +4,7 @@ from pathlib import Path
|
|||||||
from platform import system
|
from platform import system
|
||||||
|
|
||||||
from .static import ROOT
|
from .static import ROOT
|
||||||
|
from .static import USERAGENT
|
||||||
|
|
||||||
__all__ = ['Settings']
|
__all__ = ['Settings']
|
||||||
|
|
||||||
@ -13,6 +14,7 @@ class Settings:
|
|||||||
"work_path": "",
|
"work_path": "",
|
||||||
"folder_name": "Download",
|
"folder_name": "Download",
|
||||||
"name_format": "发布时间 作者昵称 作品标题",
|
"name_format": "发布时间 作者昵称 作品标题",
|
||||||
|
"user_agent": USERAGENT,
|
||||||
"cookie": "",
|
"cookie": "",
|
||||||
"proxy": None,
|
"proxy": None,
|
||||||
"timeout": 10,
|
"timeout": 10,
|
||||||
|
|||||||
@ -18,11 +18,12 @@ __all__ = [
|
|||||||
"USERSCRIPT",
|
"USERSCRIPT",
|
||||||
"HEADERS",
|
"HEADERS",
|
||||||
"PROJECT",
|
"PROJECT",
|
||||||
|
"USERAGENT",
|
||||||
]
|
]
|
||||||
|
|
||||||
VERSION_MAJOR = 2
|
VERSION_MAJOR = 2
|
||||||
VERSION_MINOR = 0
|
VERSION_MINOR = 1
|
||||||
VERSION_BETA = False
|
VERSION_BETA = True
|
||||||
ROOT = Path(__file__).resolve().parent.parent.parent
|
ROOT = Path(__file__).resolve().parent.parent.parent
|
||||||
PROJECT = f"XHS-Downloader V{VERSION_MAJOR}.{
|
PROJECT = f"XHS-Downloader V{VERSION_MAJOR}.{
|
||||||
VERSION_MINOR}{" Beta" if VERSION_BETA else ""}"
|
VERSION_MINOR}{" Beta" if VERSION_BETA else ""}"
|
||||||
@ -33,22 +34,27 @@ RELEASES = "https://github.com/JoeanAmier/XHS-Downloader/releases/latest"
|
|||||||
|
|
||||||
USERSCRIPT = "https://raw.githubusercontent.com/JoeanAmier/XHS-Downloader/master/static/XHS-Downloader.js"
|
USERSCRIPT = "https://raw.githubusercontent.com/JoeanAmier/XHS-Downloader/master/static/XHS-Downloader.js"
|
||||||
|
|
||||||
|
USERAGENT = (
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 "
|
||||||
|
"Safari/537.36")
|
||||||
|
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,'
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,"
|
||||||
'application/signed-exchange;v=b3;q=0.7',
|
"application/signed-exchange;v=b3;q=0.7",
|
||||||
'accept-language': 'zh-SG,zh-CN;q=0.9,zh;q=0.8',
|
"Accept-Encoding": "gzip, deflate, br, zstd",
|
||||||
'dnt': '1',
|
"Accept-Language": "zh-SG,zh-CN;q=0.9,zh;q=0.8",
|
||||||
'priority': 'u=0, i',
|
"Cookie": "",
|
||||||
'sec-ch-ua': '"Google Chrome";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
|
"Dnt": "1",
|
||||||
'sec-ch-ua-mobile': '?0',
|
# "Priority": "u=0, i",
|
||||||
'sec-ch-ua-platform': '"Windows"',
|
# "Sec-Ch-Ua": "\"Not/A)Brand\";v=\"8\", \"Chromium\";v=\"126\", \"Google Chrome\";v=\"126\"",
|
||||||
'sec-fetch-dest': 'document',
|
"Sec-Ch-Ua-Mobile": "?0",
|
||||||
'sec-fetch-mode': 'navigate',
|
# "Sec-Ch-Ua-Platform": "\"Windows\"",
|
||||||
'sec-fetch-site': 'none',
|
"Sec-Fetch-Dest": "document",
|
||||||
'sec-fetch-user': '?1',
|
"Sec-Fetch-Mode": "navigate",
|
||||||
'upgrade-insecure-requests': '1',
|
"Sec-Fetch-Site": "none",
|
||||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 '
|
"Sec-Fetch-User": "?1",
|
||||||
'Safari/537.36',
|
"Upgrade-Insecure-Requests": "1",
|
||||||
|
"User-Agent": USERAGENT,
|
||||||
}
|
}
|
||||||
|
|
||||||
MASTER = "b #fff200"
|
MASTER = "b #fff200"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user