diff --git a/README.md b/README.md
index 5317690..7ebcb28 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,7 @@
✅ 采集小红书作品信息
✅ 提取小红书作品下载地址
✅ 下载小红书无水印作品文件
+✅ 下载小红书 livePhoto 文件(非无水印)
✅ 自动跳过已下载的作品文件
✅ 作品文件完整性处理机制
✅ 自定义图文作品文件下载格式
@@ -148,8 +149,7 @@ async def example():
# 实例对象
work_path = "D:\\" # 作品数据/文件保存根路径,默认值:项目根路径
folder_name = "Download" # 作品文件储存文件夹名称(自动创建),默认值:Download
- user_agent = "" # 请求头 User-Agent,可选参数
- cookie = "" # 小红书网页版 Cookie,无需登录,必需参数
+ cookie = "" # 小红书网页版 Cookie,无需登录,必需参数,登录状态对数据采集有影响
proxy = None # 网络代理
timeout = 5 # 请求数据超时限制,单位:秒,默认值:10
chunk = 1024 * 1024 * 10 # 下载文件时,每次从服务器获取的数据块大小,单位:字节
@@ -161,7 +161,6 @@ async def example():
pass # 使用默认参数
async with XHS(work_path=work_path,
folder_name=folder_name,
- user_agent=user_agent,
cookie=cookie,
proxy=proxy,
timeout=timeout,
@@ -211,12 +210,6 @@ async def example():
发布时间 作者昵称 作品标题 |
-| user_agent |
-str |
-请求头 User-Agent |
-默认 UA |
-
-
| cookie |
str |
小红书网页版 Cookie,无需登录 |
@@ -271,6 +264,12 @@ async def example():
true |
+| live_download |
+bool |
+图文动图文件下载开关 |
+false |
+
+
| folder_mode |
bool |
是否将每个作品的文件储存至单独的文件夹;文件夹名称与文件名称保持一致 |
diff --git a/locale/en_GB/LC_MESSAGES/xhs.mo b/locale/en_GB/LC_MESSAGES/xhs.mo
index 8a5b173..860ea0d 100644
Binary files a/locale/en_GB/LC_MESSAGES/xhs.mo and b/locale/en_GB/LC_MESSAGES/xhs.mo differ
diff --git a/locale/en_GB/LC_MESSAGES/xhs.po b/locale/en_GB/LC_MESSAGES/xhs.po
index 83950ca..4ae6595 100644
--- a/locale/en_GB/LC_MESSAGES/xhs.po
+++ b/locale/en_GB/LC_MESSAGES/xhs.po
@@ -109,9 +109,6 @@ msgstr "Xiaohongshu web version cookie, no login required, parameters have been
msgid "小红书网页版 Cookie,无需登录,参数未设置"
msgstr "Xiaohongshu web version cookie, no login required, parameters not set"
-msgid "默认 User-Agent"
-msgstr "Default User-Agent"
-
msgid "不使用代理"
msgstr "No proxy"
@@ -247,6 +244,9 @@ msgstr "Video works download switch"
msgid "图文作品下载开关"
msgstr "Image works download switch"
+msgid "动图文件下载开关"
+msgstr "Live photo download switch"
+
msgid "配置文件 settings.json 缺少必要的参数,请删除该文件,然后重新运行程序,自动生成默认配置文件!"
msgstr "The configuration file settings.json is missing necessary parameters. Please delete the file and run the program again to automatically generate the default configuration file!"
diff --git a/locale/zh_CN/LC_MESSAGES/xhs.po b/locale/zh_CN/LC_MESSAGES/xhs.po
index 4ab8049..770e6b3 100644
--- a/locale/zh_CN/LC_MESSAGES/xhs.po
+++ b/locale/zh_CN/LC_MESSAGES/xhs.po
@@ -109,9 +109,6 @@ msgstr ""
msgid "小红书网页版 Cookie,无需登录,参数未设置"
msgstr ""
-msgid "默认 User-Agent"
-msgstr ""
-
msgid "不使用代理"
msgstr ""
@@ -247,6 +244,9 @@ msgstr ""
msgid "图文作品下载开关"
msgstr ""
+msgid "动图文件下载开关"
+msgstr ""
+
msgid "配置文件 settings.json 缺少必要的参数,请删除该文件,然后重新运行程序,自动生成默认配置文件!"
msgstr ""
diff --git a/main.py b/main.py
index 6c15b48..b814ebc 100644
--- a/main.py
+++ b/main.py
@@ -16,8 +16,7 @@ async def example():
# 实例对象
work_path = "D:\\" # 作品数据/文件保存根路径,默认值:项目根路径
folder_name = "Download" # 作品文件储存文件夹名称(自动创建),默认值:Download
- user_agent = "" # 请求头 User-Agent,可选参数
- cookie = "" # 小红书网页版 Cookie,无需登录,必需参数
+ cookie = "" # 小红书网页版 Cookie,无需登录,必需参数,登录状态对数据采集有影响
proxy = None # 网络代理
timeout = 5 # 请求数据超时限制,单位:秒,默认值:10
chunk = 1024 * 1024 * 10 # 下载文件时,每次从服务器获取的数据块大小,单位:字节
@@ -29,7 +28,6 @@ async def example():
pass # 使用默认参数
async with XHS(work_path=work_path,
folder_name=folder_name,
- user_agent=user_agent,
cookie=cookie,
proxy=proxy,
timeout=timeout,
diff --git a/source/CLI/main.py b/source/CLI/main.py
index 0076eda..4bb5e5c 100644
--- a/source/CLI/main.py
+++ b/source/CLI/main.py
@@ -123,7 +123,6 @@ class CLI:
("--work_path", "-wp", "str", _("作品数据 / 文件保存根路径")),
("--folder_name", "-fn", "str", _("作品文件储存文件夹名称")),
("--name_format", "-nf", "str", _("作品文件名称格式")),
- ("--user_agent", "-ua", "str", _("User-Agent")),
("--cookie", "-ck", "str", _("小红书网页版 Cookie,无需登录")),
("--proxy", "-p", "str", _("网络代理")),
("--timeout", "-t", "int", _("请求数据超时限制,单位:秒")),
@@ -131,6 +130,7 @@ class CLI:
("--max_retry", "-mr", "int", _("请求数据失败时,重试的最大次数")),
("--record_data", "-rd", "bool", _("是否记录作品数据至文件")),
("--image_format", "-if", "choice", _("图文作品文件下载格式,支持:PNG、WEBP")),
+ ("--live_download", "-ld", "bool", _("图文动图文件下载开关")),
("--folder_mode", "-fm", "bool", _("是否将每个作品的文件储存至单独的文件夹")),
("--language", "-l", "choice", _("设置程序语言,目前支持:zh_CN、en_GB")),
("--settings", "-s", "str", _("读取指定配置文件")),
@@ -163,7 +163,6 @@ class CLI:
)
@option("--folder_name", "-fn", )
@option("--name_format", "-nf", )
-@option("--user_agent", "-ua", )
@option("--cookie", "-ck", )
@option("--proxy", "-p", )
@option("--timeout", "-t", type=int, )
@@ -171,6 +170,7 @@ class CLI:
@option("--max_retry", "-mr", type=int, )
@option("--record_data", "-rd", type=bool, )
@option("--image_format", "-if", type=Choice(["png", "PNG", "webp", "WEBP"]), )
+@option("--live_download", "-ld", type=bool, )
@option("--folder_mode", "-fm", type=bool, )
@option("--language", "-l",
type=Choice(["zh_CN", "en_GB"]), )
diff --git a/source/TUI/setting.py b/source/TUI/setting.py
index ddff758..8602aac 100644
--- a/source/TUI/setting.py
+++ b/source/TUI/setting.py
@@ -39,9 +39,6 @@ class Setting(Screen):
Label(self.message("作品文件名称格式"), classes="params", ),
Input(self.data["name_format"], placeholder=self.message("发布时间 作者昵称 作品标题"), valid_empty=True,
id="name_format", ),
- Label(self.message("User-Agent"), classes="params", ),
- Input(self.data["user_agent"], placeholder=self.message("默认 User-Agent"), valid_empty=True,
- id="user_agent", ),
Label(self.message("小红书网页版 Cookie"), classes="params", ),
Input(placeholder=self.__check_cookie(), valid_empty=True, id="cookie", ),
Label(self.message("网络代理"), classes="params", ),
@@ -58,11 +55,16 @@ class Setting(Screen):
Checkbox(self.message("视频作品下载开关"), id="video_download", value=self.data["video_download"], ),
Checkbox(self.message("图文作品下载开关"), id="image_download", value=self.data["image_download"], ),
classes="horizontal-layout"),
+ Label(),
+ Container(
+ Checkbox(self.message("动图文件下载开关"), id="live_download", value=self.data["live_download"], ),
+ classes="horizontal-layout"),
Container(
Label(self.message("图片下载格式"), classes="params", ),
Label(self.message("程序语言"), classes="params", ),
classes="horizontal-layout",
),
+ Label(),
Container(
Select.from_values(
("PNG", "WEBP"),
@@ -95,7 +97,6 @@ class Setting(Screen):
"work_path": self.query_one("#work_path").value,
"folder_name": self.query_one("#folder_name").value,
"name_format": self.query_one("#name_format").value,
- "user_agent": self.query_one("#user_agent").value,
"cookie": self.query_one("#cookie").value or self.data["cookie"],
"proxy": self.query_one("#proxy").value or None,
"timeout": int(self.query_one("#timeout").value),
@@ -107,6 +108,7 @@ class Setting(Screen):
"language": self.query_one("#language").value,
"image_download": self.query_one("#image_download").value,
"video_download": self.query_one("#video_download").value,
+ "live_download": self.query_one("#live_download").value,
# "server": False,
})
diff --git a/source/TUI/update.py b/source/TUI/update.py
index 657bb31..89490bd 100644
--- a/source/TUI/update.py
+++ b/source/TUI/update.py
@@ -40,26 +40,45 @@ class Update(ModalScreen):
async def check_update(self) -> None:
try:
url = await self.xhs.html.request_url(RELEASES, False, None, timeout=ClientTimeout(connect=5))
- latest_major, latest_minor = map(
- int, url.split("/")[-1].split(".", 1))
- if latest_major > VERSION_MAJOR or latest_minor > VERSION_MINOR:
- tip = Text(f"{self.message("检测到新版本:{0}.{1}").format(
- VERSION_MAJOR, VERSION_MINOR)}\n{RELEASES}", style=WARNING)
- elif latest_minor == VERSION_MINOR and VERSION_BETA:
- tip = Text(
- f"{self.message("当前版本为开发版, 可更新至正式版")}\n{RELEASES}",
- style=WARNING)
- elif VERSION_BETA:
- tip = Text(
- self.message("当前已是最新开发版"),
- style=WARNING)
- else:
- tip = Text(
- self.message("当前已是最新正式版"),
- style=INFO)
+ version = url.split("/")[-1]
+ match self.compare_versions(f"{VERSION_MAJOR}.{VERSION_MINOR}", version, VERSION_BETA):
+ case 4:
+ tip = Text(f"{self.message("检测到新版本:{0}.{1}").format(
+ VERSION_MAJOR, VERSION_MINOR)}\n{RELEASES}", style=WARNING)
+ case 3:
+ tip = Text(
+ f"{self.message("当前版本为开发版, 可更新至正式版")}\n{RELEASES}",
+ style=WARNING)
+ case 2:
+ tip = Text(
+ self.message("当前已是最新开发版"),
+ style=WARNING)
+ case 1:
+ tip = Text(
+ self.message("当前已是最新正式版"),
+ style=INFO)
+ case _:
+ raise ValueError
except ValueError:
tip = Text(self.message("检测新版本失败"), style=ERROR)
self.dismiss(tip)
def on_mount(self) -> None:
self.check_update()
+
+ @staticmethod
+ def compare_versions(
+ current_version: str,
+ target_version: str,
+ is_development: bool) -> int:
+ current_major, current_minor = map(int, current_version.split('.'))
+ target_major, target_minor = map(int, target_version.split('.'))
+
+ if target_major > current_major:
+ return 4
+ if target_major == current_major:
+ if target_minor > current_minor:
+ return 4
+ if target_minor == current_minor:
+ return 3 if is_development else 1
+ return 2
diff --git a/source/application/app.py b/source/application/app.py
index c512105..8a511dd 100644
--- a/source/application/app.py
+++ b/source/application/app.py
@@ -63,6 +63,7 @@ class XHS:
image_format="PNG",
image_download=True,
video_download=True,
+ live_download=False,
folder_mode=False,
language="zh_CN",
# server=False,
@@ -77,7 +78,7 @@ class XHS:
work_path,
folder_name,
name_format,
- user_agent,
+ # user_agent,
chunk,
self.read_browser_cookie(read_cookie) or cookie,
proxy,
@@ -87,6 +88,7 @@ class XHS:
image_format,
image_download,
video_download,
+ live_download,
folder_mode,
# server,
self.message,
@@ -106,7 +108,7 @@ class XHS:
self.site = None
def __extract_image(self, container: dict, data: Namespace):
- container["下载地址"] = self.image.get_image_link(
+ container["下载地址"], container["动图地址"] = self.image.get_image_link(
data, self.manager.image_format)
def __extract_video(self, container: dict, data: Namespace):
@@ -119,7 +121,8 @@ class XHS:
logging(
log, self.message("作品 {0} 存在下载记录,跳过下载").format(i))
else:
- path, result = await self.download.run(u, index, name, container["作品类型"], log, bar)
+ path, result = await self.download.run(u, container["动图地址"], index, name, container["作品类型"],
+ log, bar)
await self.__add_record(i, result)
elif not u:
logging(log, self.message("提取作品文件下载地址失败"), ERROR)
@@ -128,6 +131,7 @@ class XHS:
async def save_data(self, data: dict, ):
data["采集时间"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
data["下载地址"] = " ".join(data["下载地址"])
+ data["动图地址"] = " ".join(i or "NaN" for i in data["动图地址"])
await self.data_recorder.add(**data)
async def __add_record(self, id_: str, result: tuple) -> None:
diff --git a/source/application/download.py b/source/application/download.py
index 9d485da..9ec80d5 100644
--- a/source/application/download.py
+++ b/source/application/download.py
@@ -32,18 +32,21 @@ class Download:
self.message = manager.message
self.folder_mode = manager.folder_mode
self.video_format = "mp4"
+ self.live_format = "mp4"
self.image_format = manager.image_format
self.image_download = manager.image_download
self.video_download = manager.video_download
+ self.live_download = manager.live_download
- async def run(self, urls: list, index: list | tuple | None, name: str, type_: str, log, bar) -> tuple[Path, tuple]:
+ async def run(self, urls: list, lives: list, index: list | tuple | None, name: str, type_: str, log, bar) -> tuple[
+ Path, tuple]:
path = self.__generate_path(name)
match type_:
case "视频":
tasks = self.__ready_download_video(urls, path, name, log)
case "图文":
tasks = self.__ready_download_image(
- urls, index, path, name, log)
+ urls, lives, index, path, name, log)
case _:
raise ValueError
tasks = [
@@ -73,14 +76,14 @@ class Download:
if not self.video_download:
logging(log, self.message("视频作品下载功能已关闭,跳过下载"))
return []
- if any(path.glob(f"{name}.*")):
- logging(log, self.message("{0} 文件已存在,跳过下载").format(name))
+ if self.__check_exists(path, f"{name}.{self.video_format}", log):
return []
return [(urls[0], name, self.video_format)]
def __ready_download_image(
self,
urls: list[str],
+ lives: list[str],
index: list | tuple | None,
path: Path,
name: str,
@@ -89,21 +92,30 @@ class Download:
if not self.image_download:
logging(log, self.message("图文作品下载功能已关闭,跳过下载"))
return tasks
- for i, j in enumerate(urls, start=1):
+ for i, j in enumerate(zip(urls, lives), start=1):
if index and i not in index:
continue
file = f"{name}_{i}"
- if any(path.glob(f"{file}.*")):
- logging(
- log, self.message(
- "{0} 文件已存在,跳过下载").format(name))
+ if not self.__check_exists(
+ path, f"{file}.{self.image_format}", log):
+ tasks.append([j[0], file, self.image_format])
+ if not self.live_download or not j[1] or self.__check_exists(
+ path, f"{file}.{self.live_format}", log):
continue
- tasks.append([j, file, self.image_format])
+ tasks.append([j[1], file, self.live_format])
return tasks
+ def __check_exists(self, path: Path, name: str, log, ) -> bool:
+ if any(path.glob(name)):
+ logging(
+ log, self.message(
+ "{0} 文件已存在,跳过下载").format(name))
+ return True
+ return False
+
@re_download
async def __download(self, url: str, path: Path, name: str, format_: str, log, bar):
- temp = self.temp.joinpath(name)
+ temp = self.temp.joinpath(f"{name}.{format_}")
try:
async with self.session.get(url, proxy=self.proxy) as response:
if response.status != 200:
@@ -124,7 +136,7 @@ class Download:
# self.__update_progress(bar, len(chunk))
self.manager.move(temp, real)
# self.__create_progress(bar, None)
- logging(log, self.message("文件 {0} 下载成功").format(name))
+ logging(log, self.message("文件 {0} 下载成功").format(real.name))
return True
except ClientError as error:
self.manager.delete(temp)
diff --git a/source/application/image.py b/source/application/image.py
index 8ff9dad..2a493e3 100644
--- a/source/application/image.py
+++ b/source/application/image.py
@@ -6,8 +6,9 @@ __all__ = ['Image']
class Image:
@classmethod
- def get_image_link(cls, data: Namespace, format_: str) -> list:
+ def get_image_link(cls, data: Namespace, format_: str) -> [list, list]:
images = data.safe_extract("imageList", [])
+ live_link = cls.__get_live_link(images)
token_list = [
cls.__extract_image_token(
Namespace.object_extract(
@@ -15,10 +16,10 @@ class Image:
match format_:
case "png":
return [Html.format_url(cls.__generate_png_link(i))
- for i in token_list]
+ for i in token_list], live_link
case "webp":
return [Html.format_url(cls.__generate_webp_link(i))
- for i in token_list]
+ for i in token_list], live_link
case _:
raise ValueError
@@ -33,3 +34,12 @@ class Image:
@staticmethod
def __extract_image_token(url: str) -> str:
return "/".join(url.split("/")[5:]).split("!")[0]
+
+ @staticmethod
+ def __get_live_link(items: list) -> list:
+ links = []
+ for item in items:
+ links.append(
+ Html.format_url(Namespace.object_extract(
+ item, "stream.h264[0].masterUrl")))
+ return links
diff --git a/source/module/__init__.py b/source/module/__init__.py
index af5ccbf..e03f620 100644
--- a/source/module/__init__.py
+++ b/source/module/__init__.py
@@ -19,7 +19,6 @@ from .static import (
WARNING,
INFO,
USERSCRIPT,
- USERAGENT,
HEADERS,
PROJECT,
)
@@ -49,7 +48,6 @@ __all__ = [
"WARNING",
"INFO",
"USERSCRIPT",
- "USERAGENT",
"HEADERS",
"retry",
"logging",
diff --git a/source/module/manager.py b/source/module/manager.py
index 94fd878..98851ca 100644
--- a/source/module/manager.py
+++ b/source/module/manager.py
@@ -9,7 +9,6 @@ from aiohttp import ClientSession
from aiohttp import ClientTimeout
from .static import HEADERS
-from .static import USERAGENT
__all__ = ["Manager"]
@@ -39,7 +38,6 @@ class Manager:
path: str,
folder: str,
name_format: str,
- user_agent: str,
chunk: int,
cookie: str,
proxy: str,
@@ -49,6 +47,7 @@ class Manager:
image_format: str,
image_download: bool,
video_download: bool,
+ live_download: bool,
folder_mode: bool,
# server: bool,
transition: Callable[[str], str],
@@ -57,8 +56,7 @@ class Manager:
self.temp = root.joinpath("./temp")
self.path = self.__check_path(path)
self.folder = self.__check_folder(folder)
- self.blank_headers = HEADERS | {
- "User-Agent": user_agent or USERAGENT, }
+ self.blank_headers = HEADERS
self.headers = self.blank_headers | {"Cookie": cookie}
self.retry = retry
self.chunk = chunk
@@ -78,6 +76,7 @@ class Manager:
self.message = transition
self.image_download = self.check_bool(image_download, True)
self.video_download = self.check_bool(video_download, True)
+ self.live_download = self.check_bool(live_download, True)
# self.server = self.check_bool(server, False)
def __check_path(self, path: str) -> Path:
diff --git a/source/module/recorder.py b/source/module/recorder.py
index 639260f..f65c209 100644
--- a/source/module/recorder.py
+++ b/source/module/recorder.py
@@ -67,10 +67,10 @@ class DataRecorder(IDRecorder):
("点赞数量", "TEXT"),
("作者昵称", "TEXT"),
("作者ID", "TEXT"),
- # ("IP归属地", "TEXT"),
("作者链接", "TEXT"),
("作品链接", "TEXT"),
("下载地址", "TEXT"),
+ ("动图地址", "TEXT"),
)
def __init__(self, manager: Manager):
diff --git a/source/module/settings.py b/source/module/settings.py
index 0c0580e..6fe88e0 100644
--- a/source/module/settings.py
+++ b/source/module/settings.py
@@ -13,7 +13,6 @@ class Settings:
"work_path": "",
"folder_name": "Download",
"name_format": "发布时间 作者昵称 作品标题",
- "user_agent": "",
"cookie": "",
"proxy": None,
"timeout": 10,
@@ -23,6 +22,7 @@ class Settings:
"image_format": "PNG",
"image_download": True,
"video_download": True,
+ "live_download": False,
"folder_mode": False,
"language": "zh_CN",
# "server": False,
diff --git a/source/module/static.py b/source/module/static.py
index 7488835..4b27079 100644
--- a/source/module/static.py
+++ b/source/module/static.py
@@ -16,7 +16,6 @@ __all__ = [
"WARNING",
"INFO",
"USERSCRIPT",
- "USERAGENT",
"HEADERS",
"PROJECT",
]
@@ -35,24 +34,22 @@ RELEASES = "https://github.com/JoeanAmier/XHS-Downloader/releases/latest"
USERSCRIPT = "https://raw.githubusercontent.com/JoeanAmier/XHS-Downloader/master/static/XHS-Downloader.js"
HEADERS = {
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,"
- "application/signed-exchange;v=b3;q=0.7",
- "Accept-Encoding": "gzip, deflate, br",
- "Accept-Language": "zh-CN,zh;q=0.9",
- "Cache-Control": "max-age=0",
- "Dnt": "1",
- "Sec-Ch-Ua": "\"Not_A Brand\";v=\"8\", \"Chromium\";v=\"120\", \"Microsoft Edge\";v=\"120\"",
- "Sec-Ch-Ua-Mobile": "?0",
- "Sec-Ch-Ua-Platform": "\"Windows\"",
- "Sec-Fetch-Dest": "document",
- "Sec-Fetch-Mode": "navigate",
- "Sec-Fetch-Site": "none",
- "Sec-Fetch-User": "?1",
- "Upgrade-Insecure-Requests": "1",
+ 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,'
+ 'application/signed-exchange;v=b3;q=0.7',
+ 'accept-language': 'zh-SG,zh-CN;q=0.9,zh;q=0.8',
+ 'dnt': '1',
+ 'priority': 'u=0, i',
+ 'sec-ch-ua': '"Google Chrome";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
+ 'sec-ch-ua-mobile': '?0',
+ 'sec-ch-ua-platform': '"Windows"',
+ 'sec-fetch-dest': 'document',
+ 'sec-fetch-mode': 'navigate',
+ 'sec-fetch-site': 'none',
+ 'sec-fetch-user': '?1',
+ 'upgrade-insecure-requests': '1',
+ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 '
+ 'Safari/537.36',
}
-USERAGENT = (
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 "
- "Safari/537.36 Edg/121.0.0.0")
MASTER = "b #fff200"
PROMPT = "b turquoise2"