mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2025-12-26 04:48:05 +08:00
新增自定义作品文件名称格式功能
This commit is contained in:
parent
ea349048c3
commit
e26d4875e3
15
README.md
15
README.md
@ -12,7 +12,7 @@
|
||||
<img alt="GitHub all releases" src="https://img.shields.io/github/downloads/JoeanAmier/XHS-Downloader/total?style=for-the-badge&color=f759ab">
|
||||
</div>
|
||||
<br>
|
||||
<p>🔥 <b>小红书链接提取/作品采集工具</b>:提取账号发布、收藏、点赞作品链接;提取搜索结果作品、用户链接;采集小红书作品信息;提取小红书作品下载地址;下载小红书无水印作品文件!</p>
|
||||
<p>🔥 <b>小红书链接提取/作品采集工具</b>:提取账号发布、收藏、点赞作品链接;提取搜索结果作品链接、用户链接;采集小红书作品信息;提取小红书作品下载地址;下载小红书无水印作品文件!</p>
|
||||
<h1>📑 项目功能</h1>
|
||||
<ul><b>程序功能</b>
|
||||
<li>✅ 采集小红书作品信息</li>
|
||||
@ -26,7 +26,8 @@
|
||||
<li>✅ 后台监听剪贴板下载作品</li>
|
||||
<li>✅ 记录已下载作品 ID</li>
|
||||
<li>✅ 支持命令行下载作品文件</li>
|
||||
<li>✅ 从浏览器读取 Cookie</li>
|
||||
<li>✅ 从浏览器读取 Cookie</li>
|
||||
<li>✅ 自定义文件名称格式</li>
|
||||
<li>☑️ 支持 API 调用功能</li>
|
||||
</ul>
|
||||
<ul><b>脚本功能</b>
|
||||
@ -59,7 +60,7 @@
|
||||
<p>如果仅需下载无水印作品文件,建议选择 <b>程序运行</b>;如果有其他需求,建议选择 <b>源码运行</b>!</p>
|
||||
<p>建议自行设置 <code>cookie</code> 参数,若不设置该参数,程序功能可能无法正常使用!</p>
|
||||
<h2>🖱 程序运行</h2>
|
||||
<p>Windows 10 及以上用户可前往 <a href="https://github.com/JoeanAmier/XHS-Downloader/releases/latest">Releases</a> 下载程序压缩包或安装包,解压或安装后打开程序文件夹,双击运行 <code>main.exe</code> 即可使用。</p>
|
||||
<p>Windows 10 及以上用户可前往 <a href="https://github.com/JoeanAmier/XHS-Downloader/releases/latest">Releases</a> 下载程序压缩包,解压后打开程序文件夹,双击运行 <code>main.exe</code> 即可使用。</p>
|
||||
<p>若通过此方式使用程序,文件默认下载路径为:<code>.\_internal\Download</code>;配置文件路径为:<code>.\_internal\settings.json</code></p>
|
||||
<h2>⌨️ 源码运行</h2>
|
||||
<ol>
|
||||
@ -128,6 +129,7 @@ async def example():
|
||||
</pre>
|
||||
<h1>⚙️ 配置文件</h1>
|
||||
<p>项目根目录下的 <code>settings.json</code> 文件,首次运行自动生成,可以自定义部分运行参数。</p>
|
||||
<p>如果设置了无效的参数值,程序将会使用参数默认值!</p>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
@ -151,6 +153,12 @@ async def example():
|
||||
<td align="center">Download</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">name_format</td>
|
||||
<td align="center">str</td>
|
||||
<td align="center">作品文件名称格式,字段之间使用空格分隔,支持字段:<code>收藏数量</code>、<code>评论数量</code>、<code>分享数量</code>、<code>点赞数量</code>、<code>作品标签</code>、<code>作品ID</code>、<code>作品标题</code>、<code>作品描述</code>、<code>作品类型</code>、<code>发布时间</code>、<code>最后更新时间</code>、<code>作者昵称</code>、<code>作者ID</code></td>
|
||||
<td align="center"><code>发布时间 作者昵称 作品标题</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">user_agent</td>
|
||||
<td align="center">str</td>
|
||||
<td align="center">请求头 User-Agent</td>
|
||||
@ -260,6 +268,7 @@ async def example():
|
||||
<ul>
|
||||
<li>微信(其他事务): Downloader_Tools</li>
|
||||
<li>微信公众号(问题解答): Downloader Tools</li>
|
||||
<li><b>Discord 社区</b>: <a href="https://discord.com/invite/ZYtmgKud9Y">点击加入社区</a></li>
|
||||
<li>QQ 群聊(使用交流): <a href="https://github.com/JoeanAmier/XHS-Downloader/blob/master/static/QQ%E7%BE%A4%E8%81%8A%E4%BA%8C%E7%BB%B4%E7%A0%81.png">扫码加入群聊</a></li>
|
||||
</ul>
|
||||
<p><b>说明:</b>QQ 群聊仅限于讨论项目使用问题,严禁发布任何广告,严禁讨论任何账号交易、账号流量、流量变现、灰色产业等相关的内容!</p>
|
||||
|
||||
Binary file not shown.
@ -249,3 +249,9 @@ msgstr "Image works download switch"
|
||||
|
||||
msgid "配置文件 settings.json 缺少必要的参数,请删除该文件,然后重新运行程序,自动生成默认配置文件!"
|
||||
msgstr "The configuration file settings.json is missing necessary parameters. Please delete the file and run the program again to automatically generate the default configuration file!"
|
||||
|
||||
msgid "作品文件名称格式"
|
||||
msgstr "Format of works file name"
|
||||
|
||||
msgid "邀请链接:"
|
||||
msgstr "Invitation link: "
|
||||
|
||||
@ -249,3 +249,9 @@ msgstr ""
|
||||
|
||||
msgid "配置文件 settings.json 缺少必要的参数,请删除该文件,然后重新运行程序,自动生成默认配置文件!"
|
||||
msgstr ""
|
||||
|
||||
msgid "作品文件名称格式"
|
||||
msgstr ""
|
||||
|
||||
msgid "邀请链接:"
|
||||
msgstr ""
|
||||
|
||||
@ -122,6 +122,7 @@ class CLI:
|
||||
("--index", "-i", "str", _("下载指定序号的图片文件,仅对图文作品生效;多个序号输入示例:\"1 3 5 7\"")),
|
||||
("--work_path", "-wp", "str", _("作品数据 / 文件保存根路径")),
|
||||
("--folder_name", "-fn", "str", _("作品文件储存文件夹名称")),
|
||||
("--name_format", "-nf", "str", _("作品文件名称格式")),
|
||||
("--user_agent", "-ua", "str", _("User-Agent")),
|
||||
("--cookie", "-ck", "str", _("小红书网页版 Cookie,无需登录")),
|
||||
("--proxy", "-p", "str", _("网络代理")),
|
||||
@ -161,6 +162,7 @@ class CLI:
|
||||
type=Path(file_okay=False),
|
||||
)
|
||||
@option("--folder_name", "-fn", )
|
||||
@option("--name_format", "-nf", )
|
||||
@option("--user_agent", "-ua", )
|
||||
@option("--cookie", "-ck", )
|
||||
@option("--proxy", "-p", )
|
||||
@ -173,9 +175,8 @@ class CLI:
|
||||
@option("--language", "-l",
|
||||
type=Choice(["zh_CN", "en_GB"]), )
|
||||
@option("--settings", "-s", type=Path(dir_okay=False), )
|
||||
@option("--browser_cookie", "-bc",
|
||||
type=Choice(list(BrowserCookie.SUPPORT_BROWSER.keys()) + [str(i) for i in range(1, 11)]),
|
||||
callback=CLI.read_cookie, )
|
||||
@option("--browser_cookie", "-bc", type=Choice(list(BrowserCookie.SUPPORT_BROWSER.keys()
|
||||
) + [str(i) for i in range(1, 11)]), callback=CLI.read_cookie, )
|
||||
@option("--update_settings", "-us", type=bool,
|
||||
is_flag=True, )
|
||||
@option("-h",
|
||||
|
||||
@ -42,6 +42,8 @@ class About(Screen):
|
||||
yield Header()
|
||||
yield Label(Text(self.message("如果 XHS-Downloader 对您有帮助,请考虑为它点个 Star,感谢您的支持!"), style=INFO),
|
||||
classes="prompt", )
|
||||
yield Label(Text("Discord 社区", style=PROMPT), classes="prompt", )
|
||||
yield Label(f"{self.message("邀请链接:")}https://discord.com/invite/ZYtmgKud9Y")
|
||||
yield Label(Text(self.message("作者的其他开源项目"), style=PROMPT), classes="prompt", )
|
||||
yield Label(Text("TikTokDownloader (抖音 / TikTok)", style=MASTER), classes="prompt", )
|
||||
yield Label("https://github.com/JoeanAmier/TikTokDownloader")
|
||||
|
||||
@ -36,6 +36,9 @@ class Setting(Screen):
|
||||
id="work_path", ),
|
||||
Label(self.message("作品文件储存文件夹名称"), classes="params", ),
|
||||
Input(self.data["folder_name"], placeholder="Download", id="folder_name", ),
|
||||
Label(self.message("作品文件名称格式"), classes="params", ),
|
||||
Input(self.data["name_format"], placeholder=self.message("发布时间 作者昵称 作品标题"), valid_empty=True,
|
||||
id="name_format", ),
|
||||
Label(self.message("User-Agent"), classes="params", ),
|
||||
Input(self.data["user_agent"], placeholder=self.message("默认 User-Agent"), valid_empty=True,
|
||||
id="user_agent", ),
|
||||
@ -91,6 +94,7 @@ class Setting(Screen):
|
||||
self.dismiss({
|
||||
"work_path": self.query_one("#work_path").value,
|
||||
"folder_name": self.query_one("#folder_name").value,
|
||||
"name_format": self.query_one("#name_format").value,
|
||||
"user_agent": self.query_one("#user_agent").value,
|
||||
"cookie": self.query_one("#cookie").value or self.data["cookie"],
|
||||
"proxy": self.query_one("#proxy").value or None,
|
||||
@ -103,6 +107,7 @@ class Setting(Screen):
|
||||
"language": self.query_one("#language").value,
|
||||
"image_download": self.query_one("#image_download").value,
|
||||
"video_download": self.query_one("#video_download").value,
|
||||
"server": False,
|
||||
})
|
||||
|
||||
@on(Button.Pressed, "#abandon")
|
||||
|
||||
@ -11,6 +11,7 @@ from urllib.parse import urlparse
|
||||
|
||||
from pyperclip import paste
|
||||
|
||||
from source.expansion import BrowserCookie
|
||||
from source.expansion import Converter
|
||||
from source.expansion import Namespace
|
||||
from source.module import DataRecorder
|
||||
@ -48,6 +49,7 @@ class XHS:
|
||||
self,
|
||||
work_path="",
|
||||
folder_name="Download",
|
||||
name_format="发布时间 作者昵称 作品标题",
|
||||
user_agent: str = None,
|
||||
cookie: str = None,
|
||||
proxy: str = None,
|
||||
@ -62,6 +64,7 @@ class XHS:
|
||||
language="zh_CN",
|
||||
server=False,
|
||||
transition: Callable[[str], str] = None,
|
||||
read_cookie: int | str = None,
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
@ -70,9 +73,10 @@ class XHS:
|
||||
ROOT,
|
||||
work_path,
|
||||
folder_name,
|
||||
name_format,
|
||||
user_agent,
|
||||
chunk,
|
||||
cookie,
|
||||
self.read_browser_cookie(read_cookie) or cookie,
|
||||
proxy,
|
||||
timeout,
|
||||
max_retry,
|
||||
@ -204,10 +208,28 @@ class XHS:
|
||||
return Namespace(data)
|
||||
|
||||
def __naming_rules(self, data: dict) -> str:
|
||||
time_ = data["发布时间"].replace(":", ".")
|
||||
author = self.manager.filter_name(data["作者昵称"]) or data["作者ID"]
|
||||
title = self.manager.filter_name(data["作品标题"]) or data["作品ID"]
|
||||
return f"{time_}_{author}_{title[:64]}"
|
||||
keys = self.manager.name_format.split()
|
||||
values = []
|
||||
for key in keys:
|
||||
match key:
|
||||
case "发布时间":
|
||||
values.append(self.__get_name_time(data))
|
||||
case "作者昵称":
|
||||
values.append(self.__get_name_author(data))
|
||||
case "作品标题":
|
||||
values.append(self.__get_name_title(data))
|
||||
case _:
|
||||
values.append(data[key])
|
||||
return self.manager.SEPARATE.join(values)
|
||||
|
||||
def __get_name_time(self, data: dict) -> str:
|
||||
return data["发布时间"].replace(":", ".")
|
||||
|
||||
def __get_name_author(self, data: dict) -> str:
|
||||
return self.manager.filter_name(data["作者昵称"]) or data["作者ID"]
|
||||
|
||||
def __get_name_title(self, data: dict) -> str:
|
||||
return self.manager.filter_name(data["作品标题"])[:64] or data["作品ID"]
|
||||
|
||||
async def monitor(self, delay=1, download=False, log=None, bar=None, data=True, ) -> None:
|
||||
logging(
|
||||
@ -252,3 +274,8 @@ class XHS:
|
||||
|
||||
async def close(self):
|
||||
await self.manager.close()
|
||||
|
||||
@staticmethod
|
||||
def read_browser_cookie(value: str | int) -> str:
|
||||
return BrowserCookie.get(
|
||||
value, domain="xiaohongshu.com") if value else ""
|
||||
|
||||
@ -46,7 +46,7 @@ class Explore:
|
||||
container["作品描述"] = data.safe_extract("desc")
|
||||
container["作品类型"] = self.explore_type.get(
|
||||
data.safe_extract("type"), "未知")
|
||||
container["IP归属地"] = data.safe_extract("ipLocation")
|
||||
# container["IP归属地"] = data.safe_extract("ipLocation")
|
||||
|
||||
def __extract_time(self, container: dict, data: Namespace):
|
||||
container["发布时间"] = datetime.fromtimestamp(
|
||||
|
||||
@ -27,17 +27,6 @@ class BrowserCookie:
|
||||
"firefox": firefox,
|
||||
"librewolf": librewolf,
|
||||
"safari": safari,
|
||||
|
||||
"Chrome": chrome,
|
||||
"Chromium": chromium,
|
||||
"Opera": opera,
|
||||
"Opera_gx": opera_gx,
|
||||
"Brave": brave,
|
||||
"Edge": edge,
|
||||
"Vivaldi": vivaldi,
|
||||
"Firefox": firefox,
|
||||
"Librewolf": librewolf,
|
||||
"Safari": safari,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@ -56,7 +45,7 @@ class BrowserCookie:
|
||||
@classmethod
|
||||
def __browser_object(cls, browser: str | int):
|
||||
if isinstance(browser, str):
|
||||
return cls.SUPPORT_BROWSER[browser]
|
||||
return cls.SUPPORT_BROWSER[browser.lower()]
|
||||
elif isinstance(browser, int):
|
||||
return list(cls.SUPPORT_BROWSER.values())[browser - 1]
|
||||
raise TypeError
|
||||
|
||||
@ -15,13 +15,30 @@ __all__ = ["Manager"]
|
||||
|
||||
|
||||
class Manager:
|
||||
NAME = compile(r"[^\u4e00-\u9fffa-zA-Z0-9!?,。;:“”()《》]")
|
||||
NAME = compile(r"[^\u4e00-\u9fffa-zA-Z0-9-_!?,。;:“”()《》]")
|
||||
NAME_KEYS = (
|
||||
'收藏数量',
|
||||
'评论数量',
|
||||
'分享数量',
|
||||
'点赞数量',
|
||||
'作品标签',
|
||||
'作品ID',
|
||||
'作品标题',
|
||||
'作品描述',
|
||||
'作品类型',
|
||||
'发布时间',
|
||||
'最后更新时间',
|
||||
'作者昵称',
|
||||
'作者ID',
|
||||
)
|
||||
SEPARATE = "_"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
root: Path,
|
||||
path: str,
|
||||
folder: str,
|
||||
name_format: str,
|
||||
user_agent: str,
|
||||
chunk: int,
|
||||
cookie: str,
|
||||
@ -44,6 +61,7 @@ class Manager:
|
||||
self.headers = self.blank_headers | {"Cookie": cookie}
|
||||
self.retry = retry
|
||||
self.chunk = chunk
|
||||
self.name_format = self.__check_name_format(name_format)
|
||||
self.record_data = self.check_bool(record_data, False)
|
||||
self.image_format = self.__check_image_format(image_format)
|
||||
self.folder_mode = self.check_bool(folder_mode, False)
|
||||
@ -118,3 +136,14 @@ class Manager:
|
||||
await self.request_session.close()
|
||||
await self.download_session.close()
|
||||
self.__clean()
|
||||
|
||||
def __check_name_format(self, format_: str) -> str:
|
||||
keys = format_.split()
|
||||
return next(
|
||||
(
|
||||
"发布时间 作者昵称 作品标题"
|
||||
for key in keys
|
||||
if key not in self.NAME_KEYS
|
||||
),
|
||||
format_,
|
||||
)
|
||||
|
||||
@ -67,7 +67,7 @@ class DataRecorder(IDRecorder):
|
||||
("点赞数量", "TEXT"),
|
||||
("作者昵称", "TEXT"),
|
||||
("作者ID", "TEXT"),
|
||||
("IP归属地", "TEXT"),
|
||||
# ("IP归属地", "TEXT"),
|
||||
("作者链接", "TEXT"),
|
||||
("作品链接", "TEXT"),
|
||||
("下载地址", "TEXT"),
|
||||
|
||||
@ -10,6 +10,7 @@ class Settings:
|
||||
default = {
|
||||
"work_path": "",
|
||||
"folder_name": "Download",
|
||||
"name_format": "发布时间 作者昵称 作品标题",
|
||||
"user_agent": "",
|
||||
"cookie": "",
|
||||
"proxy": None,
|
||||
@ -38,7 +39,7 @@ class Settings:
|
||||
|
||||
def create(self) -> dict:
|
||||
with self.file.open("w", encoding=self.encode) as f:
|
||||
dump(self.default, f, indent=4)
|
||||
dump(self.default, f, indent=4, ensure_ascii=False)
|
||||
return self.default
|
||||
|
||||
def update(self, data: dict):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user