更新无水印视频提取逻辑

This commit is contained in:
JoeamAmier
2023-12-10 16:11:29 +08:00
parent a1d296191b
commit 9fd803ac09
13 changed files with 55 additions and 26 deletions

View File

@@ -55,6 +55,7 @@ multiple_demo = f"{image_demo} {video_demo}"
path = "" # 作品数据/文件保存根路径,默认值:项目根路径 path = "" # 作品数据/文件保存根路径,默认值:项目根路径
folder_name = "Download" # 作品文件储存文件夹名称自动创建默认值Download folder_name = "Download" # 作品文件储存文件夹名称自动创建默认值Download
user_agent = "" # 请求头 User-Agent user_agent = "" # 请求头 User-Agent
cookie = "" # 小红书网页版 Cookie无需登录
proxy = "" # 网络代理 proxy = "" # 网络代理
timeout = 5 # 网络请求超时限制单位默认值10 timeout = 5 # 网络请求超时限制单位默认值10
chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位:字节 chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位:字节
@@ -64,6 +65,7 @@ max_retry = 2 # 请求数据失败时,重试的最大次数,单位:秒,
async with XHS(path=path, async with XHS(path=path,
folder_name=folder_name, folder_name=folder_name,
user_agent=user_agent, user_agent=user_agent,
cookie=cookie,
proxy=proxy, proxy=proxy,
timeout=timeout, timeout=timeout,
chunk=chunk, chunk=chunk,
@@ -106,6 +108,12 @@ async with XHS(path=path,
<td align="center">默认 UA</td> <td align="center">默认 UA</td>
</tr> </tr>
<tr> <tr>
<td align="center">cookie</td>
<td align="center">str</td>
<td align="center">小红书网页版 Cookie无需登录</td>
<td align="center">默认 Cookie</td>
</tr>
<tr>
<td align="center">proxy</td> <td align="center">proxy</td>
<td align="center">str</td> <td align="center">str</td>
<td align="center">设置代理</td> <td align="center">设置代理</td>
@@ -131,6 +139,16 @@ async with XHS(path=path,
</tr> </tr>
</tbody> </tbody>
</table> </table>
<h1>🌐 Cookie</h1>
<ol>
<li>打开浏览器(可选无痕模式启动),访问小红书任意网页</li>
<li><code>F12</code> 打开开发人员工具</li>
<li>选择 <code>控制台</code> 选项卡</li>
<li>输入 <code>document.cookie</code> 后回车确认</li>
<li>输出内容即为所需 Cookie</li>
</ol>
<br>
<img src="static/获取Cookie示意图.png" alt="">
<h1>♥️ 支持项目</h1> <h1>♥️ 支持项目</h1>
<p>如果 <b>XHS-Downloader</b> 对您有帮助,请考虑为它点个 <b>Star</b> ⭐,感谢您的支持!</p> <p>如果 <b>XHS-Downloader</b> 对您有帮助,请考虑为它点个 <b>Star</b> ⭐,感谢您的支持!</p>
<table> <table>

View File

@@ -15,6 +15,7 @@ async def example():
path = "" # 作品数据/文件保存根路径,默认值:项目根路径 path = "" # 作品数据/文件保存根路径,默认值:项目根路径
folder_name = "Download" # 作品文件储存文件夹名称自动创建默认值Download folder_name = "Download" # 作品文件储存文件夹名称自动创建默认值Download
user_agent = "" # 请求头 User-Agent user_agent = "" # 请求头 User-Agent
cookie = "" # 小红书网页版 Cookie无需登录
proxy = "" # 网络代理 proxy = "" # 网络代理
timeout = 5 # 网络请求超时限制单位默认值10 timeout = 5 # 网络请求超时限制单位默认值10
chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位:字节 chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位:字节
@@ -24,6 +25,7 @@ async def example():
async with XHS(path=path, async with XHS(path=path,
folder_name=folder_name, folder_name=folder_name,
user_agent=user_agent, user_agent=user_agent,
cookie=cookie,
proxy=proxy, proxy=proxy,
timeout=timeout, timeout=timeout,
chunk=chunk, chunk=chunk,

View File

@@ -29,7 +29,7 @@ class Download:
self.proxy = proxy self.proxy = proxy
self.chunk = chunk self.chunk = chunk
self.session = ClientSession( self.session = ClientSession(
headers=manager.headers, headers={"User-Agent": manager.headers["User-Agent"]},
timeout=ClientTimeout(connect=timeout)) timeout=ClientTimeout(connect=timeout))
self.retry = retry_ self.retry = retry_
@@ -60,16 +60,16 @@ class Download:
return True return True
try: try:
async with self.session.get(url, proxy=self.proxy) as response: async with self.session.get(url, proxy=self.proxy) as response:
self.__create_progress( # self.__create_progress(
bar, int( # bar, int(
response.headers.get( # response.headers.get(
'content-length', 0)) or None) # 'content-length', 0)) or None)
with temp.open("wb") as f: with temp.open("wb") as f:
async for chunk in response.content.iter_chunked(self.chunk): async for chunk in response.content.iter_chunked(self.chunk):
f.write(chunk) f.write(chunk)
self.__update_progress(bar, len(chunk)) # self.__update_progress(bar, len(chunk))
self.manager.move(temp, file) self.manager.move(temp, file)
self.__create_progress(bar, None) # self.__create_progress(bar, None)
self.rich_log(log, f"{name} 下载成功") self.rich_log(log, f"{name} 下载成功")
return True return True
except ( except (
@@ -77,19 +77,19 @@ class Download:
ServerDisconnectedError, ServerDisconnectedError,
): ):
self.manager.delete(temp) self.manager.delete(temp)
self.__create_progress(bar, None) # self.__create_progress(bar, None)
self.rich_log(log, f"{name} 下载失败", "bright_red") self.rich_log(log, f"{name} 下载失败", "bright_red")
return False return False
@staticmethod # @staticmethod
def __create_progress(bar, total: int | None): # def __create_progress(bar, total: int | None):
if bar: # if bar:
bar.update(total=total) # bar.update(total=total)
@staticmethod # @staticmethod
def __update_progress(bar, advance: int): # def __update_progress(bar, advance: int):
if bar: # if bar:
bar.advance(advance) # bar.advance(advance)
@staticmethod @staticmethod
def rich_log(log, text, style="bright_green"): def rich_log(log, text, style="bright_green"):

View File

@@ -6,11 +6,18 @@ __all__ = ["Manager"]
class Manager: class Manager:
def __init__(self, root: Path, ua: str, retry: int): def __init__(self, root: Path, ua: str, cookie: str, retry: int):
self.temp = root.joinpath("./temp") self.temp = root.joinpath("./temp")
self.headers = { self.headers = {
"User-Agent": ua or "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " "User-Agent": ua or "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0", } "Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0",
"Cookie": cookie or "abRequestId=54c534bb-a2c6-558f-8e03-5b4c5c45635c; xsecappid=xhs-pc-web; a1=18c286a400"
"4jy56qvzejvp631col0hd3032h4zjez50000106381; webId=779c977da3a15b5623015be94bdcc9e9; g"
"id=yYSJYK0qDW8KyYSJYK048quV84Vv2KAhudVhJduUKqySlx2818xfq4888y8KqYy8y2y2f8Jy; web_sess"
"ion=030037a259ce5f15c8d560dc12224a9fdc2ed1; webBuild=3.19.4; websectiga=984412fef754c"
"018e472127b8effd174be8a5d51061c991aadd200c69a2801d6; sec_poison_id=3dd48845-d604-4535"
"-bcc2-a859e97518bf; unread={%22ub%22:%22655eb3d60000000032033955%22%2C%22ue%22:%22656"
"e9ef2000000003801ff3d%22%2C%22uc%22:29}; cache_feeds=[]"}
self.retry = retry self.retry = retry
@staticmethod @staticmethod

View File

@@ -10,6 +10,7 @@ class Settings:
"path": "", "path": "",
"folder_name": "Download", "folder_name": "Download",
"user_agent": "", "user_agent": "",
"cookie": "",
"proxy": "", "proxy": "",
"timeout": 10, "timeout": 10,
"chunk": 1024 * 1024, "chunk": 1024 * 1024,

View File

@@ -6,7 +6,7 @@ __all__ = ['Video']
class Video: class Video:
VIDEO_TOKEN = compile(r'"originVideoKey":"(\S+?\\u002F\S+?)"') VIDEO_TOKEN = compile(r'"originVideoKey":"(\S+?)"')
def get_video_link(self, html: str) -> list: def get_video_link(self, html: str) -> list:
return [Html.format_url(f"https://sns-video-hw.xhscdn.com/{ return [Html.format_url(f"https://sns-video-hw.xhscdn.com/{

View File

@@ -6,7 +6,7 @@ from rich.text import Text
from textual.app import App from textual.app import App
from textual.app import ComposeResult from textual.app import ComposeResult
from textual.binding import Binding from textual.binding import Binding
from textual.containers import Center # from textual.containers import Center
from textual.containers import HorizontalScroll from textual.containers import HorizontalScroll
from textual.containers import ScrollableContainer from textual.containers import ScrollableContainer
from textual.widgets import Button from textual.widgets import Button
@@ -28,8 +28,8 @@ from .Video import Video
__all__ = ['XHS', 'XHSDownloader'] __all__ = ['XHS', 'XHSDownloader']
RELEASES = "https://github.com/JoeanAmier/XHS-Downloader/releases/latest" RELEASES = "https://github.com/JoeanAmier/XHS-Downloader/releases/latest"
VERSION = 1.6 VERSION = 1.7
BETA = False BETA = True
ROOT = Path(__file__).resolve().parent.parent ROOT = Path(__file__).resolve().parent.parent
@@ -49,13 +49,14 @@ class XHS:
path="", path="",
folder_name="Download", folder_name="Download",
user_agent: str = None, user_agent: str = None,
cookie: str = None,
proxy: str = "", proxy: str = "",
timeout=10, timeout=10,
chunk=1024 * 1024, chunk=1024 * 1024,
max_retry=5, max_retry=5,
**kwargs, **kwargs,
): ):
self.manager = Manager(ROOT, user_agent, max_retry) self.manager = Manager(ROOT, user_agent, cookie, max_retry)
self.html = Html( self.html = Html(
self.manager.headers, self.manager.headers,
proxy, proxy,
@@ -175,10 +176,10 @@ class XHSDownloader(App):
HorizontalScroll(Button("下载无水印图片/视频", id="deal"), HorizontalScroll(Button("下载无水印图片/视频", id="deal"),
Button("读取剪贴板", id="paste"), Button("读取剪贴板", id="paste"),
Button("清空输入框", id="reset"), ), Button("清空输入框", id="reset"), ),
Label(Text("程序状态", style="b bright_blue")), # Label(Text("程序状态", style="b bright_blue")),
) )
with Center(): # with Center():
yield ProgressBar(total=None) # yield ProgressBar(total=None)
yield RichLog(markup=True) yield RichLog(markup=True)
yield Footer() yield Footer()

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 9.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 51 KiB

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 102 KiB

After

Width:  |  Height:  |  Size: 132 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 140 KiB

After

Width:  |  Height:  |  Size: 158 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB