mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2025-12-26 04:48:05 +08:00
更新项目代码
This commit is contained in:
parent
395d81c522
commit
413168f122
41
README.md
41
README.md
@ -11,10 +11,11 @@
|
||||
<h1>📑 功能清单</h1>
|
||||
<ul>
|
||||
<li>✅ 采集小红书图文/视频作品信息</li>
|
||||
<li>✅ 获取小红书图文/视频作品文件下载地址</li>
|
||||
<li>✅ 提取小红书图文/视频作品文件下载地址</li>
|
||||
<li>✅ 下载小红书无水印图文/视频作品文件</li>
|
||||
<li>✅ 自动跳过已下载的作品文件</li>
|
||||
<li>✅ 作品文件完整性处理机制</li>
|
||||
<li>☑️ 采集作品信息储存至文件</li>
|
||||
</ul>
|
||||
<h1>📸 程序截图</h1>
|
||||
<br>
|
||||
@ -25,7 +26,7 @@
|
||||
<li><code>https://www.xiaohongshu.com/discovery/item/作品ID</code></li>
|
||||
<li><code>https://xhslink.com/分享码</code></li>
|
||||
<br/>
|
||||
<p><b>可以单次输入多个作品链接,链接之间使用空格分隔。</b></p>
|
||||
<p><b>支持单次输入多个作品链接,链接之间使用空格分隔。</b></p>
|
||||
</ul>
|
||||
<h1>🪟 关于终端</h1>
|
||||
<p>⭐ 推荐使用 <a href="https://learn.microsoft.com/zh-cn/windows/terminal/install">Windows 终端</a> (Windows 11 自带默认终端)运行程序以便获得最佳显示效果!</p>
|
||||
@ -41,7 +42,7 @@
|
||||
<li>运行 <code>main.py</code> 即可使用</li>
|
||||
</ol>
|
||||
<h2>💻 二次开发</h2>
|
||||
<p>如果想要获取小红书图文/视频作品信息,可以根据 <code>main.py</code> 的注释提示进行代码调用。</p>
|
||||
<p>如果需要获取小红书图文/视频作品信息,可以根据 <code>main.py</code> 的注释提示进行代码调用。</p>
|
||||
<pre>
|
||||
# 测试链接
|
||||
error_demo = "https://github.com/JoeanAmier/XHS_Downloader"
|
||||
@ -49,24 +50,26 @@ image_demo = "https://www.xiaohongshu.com/explore/63b275a30000000019020185"
|
||||
video_demo = "https://www.xiaohongshu.com/explore/64edb460000000001f03cadc"
|
||||
multiple_demo = f"{image_demo} {video_demo}"
|
||||
# 实例对象
|
||||
path = "D:\\" # 作品下载储存根路径,默认值:当前路径
|
||||
path = "" # 作品下载储存根路径,默认值:当前路径
|
||||
folder = "Download" # 作品下载文件夹名称(自动创建),默认值:Download
|
||||
proxies = None # 网络代理
|
||||
user_agent = "" # 请求头 User-Agent
|
||||
proxy = None # 网络代理
|
||||
timeout = 5 # 网络请求超时限制,默认值:10
|
||||
chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位字节
|
||||
# with XHS() as xhs:
|
||||
# async with XHS() as xhs:
|
||||
# pass # 使用默认参数
|
||||
with XHS(path=path,
|
||||
folder=folder,
|
||||
proxies=proxies,
|
||||
timeout=timeout,
|
||||
chunk=chunk) as xhs: # 使用自定义参数
|
||||
async with XHS(path=path,
|
||||
folder=folder,
|
||||
user_agent=user_agent,
|
||||
proxy=proxy,
|
||||
timeout=timeout,
|
||||
chunk=chunk) as xhs: # 使用自定义参数
|
||||
download = True # 是否下载作品文件,默认值:False
|
||||
# 返回作品详细信息,包括下载地址
|
||||
print(xhs.extract(error_demo)) # 获取数据失败时返回空字典
|
||||
print(xhs.extract(image_demo, download=download))
|
||||
print(xhs.extract(video_demo, download=download))
|
||||
print(xhs.extract(multiple_demo, download=download))
|
||||
print(await xhs.extract(error_demo, download=download)) # 获取数据失败时返回空字典
|
||||
print(await xhs.extract(image_demo, download=download))
|
||||
print(await xhs.extract(video_demo, download=download))
|
||||
print(await xhs.extract(multiple_demo, download=download)) # 支持传入多个作品链接
|
||||
</pre>
|
||||
<h1>⚙️ 配置文件</h1>
|
||||
<p>项目根目录下的 <code>settings.json</code> 文件,首次运行自动生成,可以自定义部分运行参数。</p>
|
||||
@ -83,7 +86,7 @@ with XHS(path=path,
|
||||
<tr>
|
||||
<td align="center">path</td>
|
||||
<td align="center">str</td>
|
||||
<td align="center">作品文件储存根路径</td>
|
||||
<td align="center">作品数据 / 文件保存根路径</td>
|
||||
<td align="center">项目根路径</td>
|
||||
</tr>
|
||||
<tr>
|
||||
@ -93,6 +96,12 @@ with XHS(path=path,
|
||||
<td align="center">Download</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">user_agent</td>
|
||||
<td align="center">str</td>
|
||||
<td align="center">请求头 User-Agent</td>
|
||||
<td align="center">内置 UA</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">proxy</td>
|
||||
<td align="center">str</td>
|
||||
<td align="center">设置代理</td>
|
||||
|
||||
18
main.py
18
main.py
@ -11,24 +11,26 @@ async def example():
|
||||
video_demo = "https://www.xiaohongshu.com/explore/64edb460000000001f03cadc"
|
||||
multiple_demo = f"{image_demo} {video_demo}"
|
||||
# 实例对象
|
||||
path = "D:\\" # 作品下载储存根路径,默认值:当前路径
|
||||
path = "" # 作品下载储存根路径,默认值:当前路径
|
||||
folder = "Download" # 作品下载文件夹名称(自动创建),默认值:Download
|
||||
proxies = None # 网络代理
|
||||
user_agent = "" # 请求头 User-Agent
|
||||
proxy = None # 网络代理
|
||||
timeout = 5 # 网络请求超时限制,默认值:10
|
||||
chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位字节
|
||||
async with XHS() as xhs:
|
||||
pass # 使用默认参数
|
||||
# async with XHS() as xhs:
|
||||
# pass # 使用默认参数
|
||||
async with XHS(path=path,
|
||||
folder=folder,
|
||||
proxy=proxies,
|
||||
user_agent=user_agent,
|
||||
proxy=proxy,
|
||||
timeout=timeout,
|
||||
chunk=chunk) as xhs: # 使用自定义参数
|
||||
download = False # 是否下载作品文件,默认值:False
|
||||
download = True # 是否下载作品文件,默认值:False
|
||||
# 返回作品详细信息,包括下载地址
|
||||
print(await xhs.extract(error_demo)) # 获取数据失败时返回空字典
|
||||
print(await xhs.extract(error_demo, download=download)) # 获取数据失败时返回空字典
|
||||
print(await xhs.extract(image_demo, download=download))
|
||||
print(await xhs.extract(video_demo, download=download))
|
||||
print(await xhs.extract(multiple_demo, download=download))
|
||||
print(await xhs.extract(multiple_demo, download=download)) # 支持传入多个作品链接
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@ -1,11 +1,8 @@
|
||||
from pathlib import Path
|
||||
|
||||
from aiohttp import ClientConnectionError
|
||||
from aiohttp import ClientProxyConnectionError
|
||||
from aiohttp import ClientSSLError
|
||||
from aiohttp import ClientSession
|
||||
|
||||
# from aiohttp import ClientTimeout
|
||||
from aiohttp import ClientTimeout
|
||||
from aiohttp import ServerTimeoutError
|
||||
|
||||
__all__ = ['Download']
|
||||
|
||||
@ -26,8 +23,9 @@ class Download:
|
||||
self.root = self.__init_root(root, path, folder)
|
||||
self.proxy = proxy
|
||||
self.chunk = chunk
|
||||
# self.timeout = ClientTimeout(total=timeout)
|
||||
self.session = ClientSession(headers=manager.headers)
|
||||
self.session = ClientSession(
|
||||
headers=manager.headers,
|
||||
timeout=ClientTimeout(connect=timeout))
|
||||
|
||||
def __init_root(self, root: Path, path: str, folder: str) -> Path:
|
||||
if path and (r := Path(path)).is_dir():
|
||||
@ -42,8 +40,10 @@ class Download:
|
||||
if type_ == 0:
|
||||
await self.__download(urls[0], f"{name}.mp4", log, bar)
|
||||
elif type_ == 1:
|
||||
for index, url in enumerate(urls):
|
||||
await self.__download(url, f"{name}_{index + 1}.png", log, bar)
|
||||
for index, url in enumerate(urls, start=1):
|
||||
await self.__download(url, f"{name}_{index}.png", log, bar)
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
async def __download(self, url: str, name: str, log, bar):
|
||||
temp = self.temp.joinpath(name)
|
||||
@ -52,32 +52,26 @@ class Download:
|
||||
return
|
||||
try:
|
||||
async with self.session.get(url, proxy=self.proxy) as response:
|
||||
# self.__create_progress(bar, int(response.headers.get('content-length', 0)))
|
||||
self.__create_progress(
|
||||
bar, int(
|
||||
response.headers.get(
|
||||
'content-length', 0)) or None)
|
||||
with temp.open("wb") as f:
|
||||
async for chunk in response.content.iter_chunked(self.chunk):
|
||||
f.write(chunk)
|
||||
# self.__update_progress(bar, len(chunk))
|
||||
# self.__remove_progress(bar)
|
||||
self.__update_progress(bar, len(chunk))
|
||||
self.manager.move(temp, file)
|
||||
except (
|
||||
ClientProxyConnectionError,
|
||||
ClientSSLError,
|
||||
ClientConnectionError,
|
||||
TimeoutError,
|
||||
):
|
||||
self.__create_progress(bar, None)
|
||||
except ServerTimeoutError:
|
||||
self.manager.delete(temp)
|
||||
# self.__remove_progress(bar)
|
||||
self.__create_progress(bar, None)
|
||||
|
||||
# @staticmethod
|
||||
# def __create_progress(bar, total: int | None):
|
||||
# if bar:
|
||||
# bar.update(total=total)
|
||||
#
|
||||
# @staticmethod
|
||||
# def __update_progress(bar, advance: int):
|
||||
# if bar:
|
||||
# bar.advance(advance)
|
||||
#
|
||||
# @staticmethod
|
||||
# def __remove_progress(bar):
|
||||
# pass
|
||||
@staticmethod
|
||||
def __create_progress(bar, total: int | None):
|
||||
if bar:
|
||||
bar.update(total=total)
|
||||
|
||||
@staticmethod
|
||||
def __update_progress(bar, advance: int):
|
||||
if bar:
|
||||
bar.advance(advance)
|
||||
|
||||
@ -1,9 +1,6 @@
|
||||
from aiohttp import ClientConnectionError
|
||||
from aiohttp import ClientProxyConnectionError
|
||||
from aiohttp import ClientSSLError
|
||||
from aiohttp import ClientSession
|
||||
|
||||
# from aiohttp import ClientTimeout
|
||||
from aiohttp import ClientTimeout
|
||||
from aiohttp import ServerTimeoutError
|
||||
|
||||
__all__ = ['Html']
|
||||
|
||||
@ -18,7 +15,9 @@ class Html:
|
||||
self.proxy = proxy
|
||||
self.session = ClientSession(
|
||||
headers=headers | {
|
||||
"Referer": "https://www.xiaohongshu.com/", })
|
||||
"Referer": "https://www.xiaohongshu.com/", },
|
||||
timeout=ClientTimeout(connect=timeout),
|
||||
)
|
||||
|
||||
async def request_url(
|
||||
self,
|
||||
@ -30,11 +29,7 @@ class Html:
|
||||
proxy=self.proxy,
|
||||
) as response:
|
||||
return await response.text() if text else response.url
|
||||
except (
|
||||
ClientProxyConnectionError,
|
||||
ClientSSLError,
|
||||
ClientConnectionError,
|
||||
):
|
||||
except ServerTimeoutError:
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
|
||||
@ -6,12 +6,11 @@ __all__ = ["Manager"]
|
||||
|
||||
|
||||
class Manager:
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/119.0.0.0 Safari/537.36", }
|
||||
|
||||
def __init__(self, root: Path):
|
||||
def __init__(self, root: Path, ua: str):
|
||||
self.temp = root.joinpath("./temp")
|
||||
self.headers = {
|
||||
"User-Agent": ua or "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0", }
|
||||
|
||||
@staticmethod
|
||||
def is_exists(path: Path) -> bool:
|
||||
|
||||
@ -9,7 +9,8 @@ class Settings:
|
||||
default = {
|
||||
"path": "",
|
||||
"folder": "Download",
|
||||
"proxies": None,
|
||||
"user_agent": "",
|
||||
"proxy": "",
|
||||
"timeout": 10,
|
||||
"chunk": 1024 * 1024,
|
||||
}
|
||||
|
||||
@ -30,20 +30,27 @@ __all__ = ['XHS', 'XHSDownloader']
|
||||
|
||||
class XHS:
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
link = compile(r"https://www\.xiaohongshu\.com/explore/[a-z0-9]+")
|
||||
share = compile(r"https://www\.xiaohongshu\.com/discovery/item/[a-z0-9]+")
|
||||
short = compile(r"https://xhslink\.com/[A-Za-z0-9]+")
|
||||
LINK = compile(r"https://www\.xiaohongshu\.com/explore/[a-z0-9]+")
|
||||
SHARE = compile(r"https://www\.xiaohongshu\.com/discovery/item/[a-z0-9]+")
|
||||
SHORT = compile(r"https://xhslink\.com/[A-Za-z0-9]+")
|
||||
__INSTANCE = None
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
if not cls.__INSTANCE:
|
||||
cls.__INSTANCE = super().__new__(cls)
|
||||
return cls.__INSTANCE
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
path="",
|
||||
folder="Download",
|
||||
proxy=None,
|
||||
user_agent: str = None,
|
||||
proxy: str = None,
|
||||
timeout=10,
|
||||
chunk=1024 * 1024,
|
||||
**kwargs,
|
||||
):
|
||||
self.manager = Manager(self.ROOT)
|
||||
self.manager = Manager(self.ROOT, user_agent)
|
||||
self.html = Html(self.manager.headers, proxy, timeout)
|
||||
self.image = Image()
|
||||
self.video = Video()
|
||||
@ -81,12 +88,12 @@ class XHS:
|
||||
async def __deal_links(self, url: str) -> list:
|
||||
urls = []
|
||||
for i in url.split():
|
||||
if u := self.short.search(i):
|
||||
if u := self.SHORT.search(i):
|
||||
i = await self.html.request_url(
|
||||
u.group(), False)
|
||||
if u := self.share.search(i):
|
||||
if u := self.SHARE.search(i):
|
||||
urls.append(u.group())
|
||||
elif u := self.link.search(i):
|
||||
elif u := self.LINK.search(i):
|
||||
urls.append(u.group())
|
||||
return urls
|
||||
|
||||
@ -118,18 +125,19 @@ class XHS:
|
||||
await self.html.session.close()
|
||||
await self.download.session.close()
|
||||
|
||||
def rich_log(self, log, text, style="b bright_green"):
|
||||
@staticmethod
|
||||
def rich_log(log, text, style="b bright_green"):
|
||||
if log:
|
||||
log.write(Text(text, style=style))
|
||||
else:
|
||||
self.console.print(text, style=style)
|
||||
print(text)
|
||||
|
||||
|
||||
class XHSDownloader(App):
|
||||
VERSION = 1.6
|
||||
BETA = True
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
APP = XHS(**Settings(ROOT).run())
|
||||
# APP = XHS(**Settings(ROOT).run())
|
||||
CSS_PATH = ROOT.joinpath(
|
||||
"static/XHS-Downloader.tcss")
|
||||
BINDINGS = [
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user