新增自定义作品文件名称格式功能

This commit is contained in:
JoeanAmier 2024-04-27 20:56:37 +08:00
parent ea349048c3
commit e26d4875e3
13 changed files with 102 additions and 27 deletions

View File

@ -12,7 +12,7 @@
<img alt="GitHub all releases" src="https://img.shields.io/github/downloads/JoeanAmier/XHS-Downloader/total?style=for-the-badge&color=f759ab">
</div>
<br>
<p>🔥 <b>小红书链接提取/作品采集工具</b>:提取账号发布、收藏、点赞作品链接;提取搜索结果作品、用户链接;采集小红书作品信息;提取小红书作品下载地址;下载小红书无水印作品文件!</p>
<p>🔥 <b>小红书链接提取/作品采集工具</b>:提取账号发布、收藏、点赞作品链接;提取搜索结果作品链接、用户链接;采集小红书作品信息;提取小红书作品下载地址;下载小红书无水印作品文件!</p>
<h1>📑 项目功能</h1>
<ul><b>程序功能</b>
<li>✅ 采集小红书作品信息</li>
@ -26,7 +26,8 @@
<li>✅ 后台监听剪贴板下载作品</li>
<li>✅ 记录已下载作品 ID</li>
<li>✅ 支持命令行下载作品文件</li>
<li>✅ 从浏览器读取 Cookie</li>
<li>✅ 从浏览器读取 Cookie</li>
<li>✅ 自定义文件名称格式</li>
<li>☑️ 支持 API 调用功能</li>
</ul>
<ul><b>脚本功能</b>
@ -59,7 +60,7 @@
<p>如果仅需下载无水印作品文件,建议选择 <b>程序运行</b>;如果有其他需求,建议选择 <b>源码运行</b></p>
<p>建议自行设置 <code>cookie</code> 参数,若不设置该参数,程序功能可能无法正常使用!</p>
<h2>🖱 程序运行</h2>
<p>Windows 10 及以上用户可前往 <a href="https://github.com/JoeanAmier/XHS-Downloader/releases/latest">Releases</a> 下载程序压缩包或安装包,解压或安装后打开程序文件夹,双击运行 <code>main.exe</code> 即可使用。</p>
<p>Windows 10 及以上用户可前往 <a href="https://github.com/JoeanAmier/XHS-Downloader/releases/latest">Releases</a> 下载程序压缩包,解压后打开程序文件夹,双击运行 <code>main.exe</code> 即可使用。</p>
<p>若通过此方式使用程序,文件默认下载路径为:<code>.\_internal\Download</code>;配置文件路径为:<code>.\_internal\settings.json</code></p>
<h2>⌨️ 源码运行</h2>
<ol>
@ -128,6 +129,7 @@ async def example():
</pre>
<h1>⚙️ 配置文件</h1>
<p>项目根目录下的 <code>settings.json</code> 文件,首次运行自动生成,可以自定义部分运行参数。</p>
<p>如果设置了无效的参数值,程序将会使用参数默认值!</p>
<table>
<thead>
<tr>
@ -151,6 +153,12 @@ async def example():
<td align="center">Download</td>
</tr>
<tr>
<td align="center">name_format</td>
<td align="center">str</td>
<td align="center">作品文件名称格式,字段之间使用空格分隔,支持字段:<code>收藏数量</code><code>评论数量</code><code>分享数量</code><code>点赞数量</code><code>作品标签</code><code>作品ID</code><code>作品标题</code><code>作品描述</code><code>作品类型</code><code>发布时间</code><code>最后更新时间</code><code>作者昵称</code><code>作者ID</code></td>
<td align="center"><code>发布时间 作者昵称 作品标题</code></td>
</tr>
<tr>
<td align="center">user_agent</td>
<td align="center">str</td>
<td align="center">请求头 User-Agent</td>
@ -260,6 +268,7 @@ async def example():
<ul>
<li>微信(其他事务): Downloader_Tools</li>
<li>微信公众号(问题解答): Downloader Tools</li>
<li><b>Discord 社区</b>: <a href="https://discord.com/invite/ZYtmgKud9Y">点击加入社区</a></li>
<li>QQ 群聊(使用交流): <a href="https://github.com/JoeanAmier/XHS-Downloader/blob/master/static/QQ%E7%BE%A4%E8%81%8A%E4%BA%8C%E7%BB%B4%E7%A0%81.png">扫码加入群聊</a></li>
</ul>
<p><b>说明:</b>QQ 群聊仅限于讨论项目使用问题,严禁发布任何广告,严禁讨论任何账号交易、账号流量、流量变现、灰色产业等相关的内容!</p>

Binary file not shown.

View File

@ -249,3 +249,9 @@ msgstr "Image works download switch"
msgid "配置文件 settings.json 缺少必要的参数,请删除该文件,然后重新运行程序,自动生成默认配置文件!"
msgstr "The configuration file settings.json is missing necessary parameters. Please delete the file and run the program again to automatically generate the default configuration file!"
msgid "作品文件名称格式"
msgstr "Format of works file name"
msgid "邀请链接:"
msgstr "Invitation link: "

View File

@ -249,3 +249,9 @@ msgstr ""
msgid "配置文件 settings.json 缺少必要的参数,请删除该文件,然后重新运行程序,自动生成默认配置文件!"
msgstr ""
msgid "作品文件名称格式"
msgstr ""
msgid "邀请链接:"
msgstr ""

View File

@ -122,6 +122,7 @@ class CLI:
("--index", "-i", "str", _("下载指定序号的图片文件,仅对图文作品生效;多个序号输入示例:\"1 3 5 7\"")),
("--work_path", "-wp", "str", _("作品数据 / 文件保存根路径")),
("--folder_name", "-fn", "str", _("作品文件储存文件夹名称")),
("--name_format", "-nf", "str", _("作品文件名称格式")),
("--user_agent", "-ua", "str", _("User-Agent")),
("--cookie", "-ck", "str", _("小红书网页版 Cookie无需登录")),
("--proxy", "-p", "str", _("网络代理")),
@ -161,6 +162,7 @@ class CLI:
type=Path(file_okay=False),
)
@option("--folder_name", "-fn", )
@option("--name_format", "-nf", )
@option("--user_agent", "-ua", )
@option("--cookie", "-ck", )
@option("--proxy", "-p", )
@ -173,9 +175,8 @@ class CLI:
@option("--language", "-l",
type=Choice(["zh_CN", "en_GB"]), )
@option("--settings", "-s", type=Path(dir_okay=False), )
@option("--browser_cookie", "-bc",
type=Choice(list(BrowserCookie.SUPPORT_BROWSER.keys()) + [str(i) for i in range(1, 11)]),
callback=CLI.read_cookie, )
@option("--browser_cookie", "-bc", type=Choice(list(BrowserCookie.SUPPORT_BROWSER.keys()
) + [str(i) for i in range(1, 11)]), callback=CLI.read_cookie, )
@option("--update_settings", "-us", type=bool,
is_flag=True, )
@option("-h",

View File

@ -42,6 +42,8 @@ class About(Screen):
yield Header()
yield Label(Text(self.message("如果 XHS-Downloader 对您有帮助,请考虑为它点个 Star感谢您的支持"), style=INFO),
classes="prompt", )
yield Label(Text("Discord 社区", style=PROMPT), classes="prompt", )
yield Label(f"{self.message("邀请链接:")}https://discord.com/invite/ZYtmgKud9Y")
yield Label(Text(self.message("作者的其他开源项目"), style=PROMPT), classes="prompt", )
yield Label(Text("TikTokDownloader (抖音 / TikTok)", style=MASTER), classes="prompt", )
yield Label("https://github.com/JoeanAmier/TikTokDownloader")

View File

@ -36,6 +36,9 @@ class Setting(Screen):
id="work_path", ),
Label(self.message("作品文件储存文件夹名称"), classes="params", ),
Input(self.data["folder_name"], placeholder="Download", id="folder_name", ),
Label(self.message("作品文件名称格式"), classes="params", ),
Input(self.data["name_format"], placeholder=self.message("发布时间 作者昵称 作品标题"), valid_empty=True,
id="name_format", ),
Label(self.message("User-Agent"), classes="params", ),
Input(self.data["user_agent"], placeholder=self.message("默认 User-Agent"), valid_empty=True,
id="user_agent", ),
@ -91,6 +94,7 @@ class Setting(Screen):
self.dismiss({
"work_path": self.query_one("#work_path").value,
"folder_name": self.query_one("#folder_name").value,
"name_format": self.query_one("#name_format").value,
"user_agent": self.query_one("#user_agent").value,
"cookie": self.query_one("#cookie").value or self.data["cookie"],
"proxy": self.query_one("#proxy").value or None,
@ -103,6 +107,7 @@ class Setting(Screen):
"language": self.query_one("#language").value,
"image_download": self.query_one("#image_download").value,
"video_download": self.query_one("#video_download").value,
"server": False,
})
@on(Button.Pressed, "#abandon")

View File

@ -11,6 +11,7 @@ from urllib.parse import urlparse
from pyperclip import paste
from source.expansion import BrowserCookie
from source.expansion import Converter
from source.expansion import Namespace
from source.module import DataRecorder
@ -48,6 +49,7 @@ class XHS:
self,
work_path="",
folder_name="Download",
name_format="发布时间 作者昵称 作品标题",
user_agent: str = None,
cookie: str = None,
proxy: str = None,
@ -62,6 +64,7 @@ class XHS:
language="zh_CN",
server=False,
transition: Callable[[str], str] = None,
read_cookie: int | str = None,
*args,
**kwargs,
):
@ -70,9 +73,10 @@ class XHS:
ROOT,
work_path,
folder_name,
name_format,
user_agent,
chunk,
cookie,
self.read_browser_cookie(read_cookie) or cookie,
proxy,
timeout,
max_retry,
@ -204,10 +208,28 @@ class XHS:
return Namespace(data)
def __naming_rules(self, data: dict) -> str:
time_ = data["发布时间"].replace(":", ".")
author = self.manager.filter_name(data["作者昵称"]) or data["作者ID"]
title = self.manager.filter_name(data["作品标题"]) or data["作品ID"]
return f"{time_}_{author}_{title[:64]}"
keys = self.manager.name_format.split()
values = []
for key in keys:
match key:
case "发布时间":
values.append(self.__get_name_time(data))
case "作者昵称":
values.append(self.__get_name_author(data))
case "作品标题":
values.append(self.__get_name_title(data))
case _:
values.append(data[key])
return self.manager.SEPARATE.join(values)
def __get_name_time(self, data: dict) -> str:
return data["发布时间"].replace(":", ".")
def __get_name_author(self, data: dict) -> str:
return self.manager.filter_name(data["作者昵称"]) or data["作者ID"]
def __get_name_title(self, data: dict) -> str:
return self.manager.filter_name(data["作品标题"])[:64] or data["作品ID"]
async def monitor(self, delay=1, download=False, log=None, bar=None, data=True, ) -> None:
logging(
@ -252,3 +274,8 @@ class XHS:
async def close(self):
await self.manager.close()
@staticmethod
def read_browser_cookie(value: str | int) -> str:
return BrowserCookie.get(
value, domain="xiaohongshu.com") if value else ""

View File

@ -46,7 +46,7 @@ class Explore:
container["作品描述"] = data.safe_extract("desc")
container["作品类型"] = self.explore_type.get(
data.safe_extract("type"), "未知")
container["IP归属地"] = data.safe_extract("ipLocation")
# container["IP归属地"] = data.safe_extract("ipLocation")
def __extract_time(self, container: dict, data: Namespace):
container["发布时间"] = datetime.fromtimestamp(

View File

@ -27,17 +27,6 @@ class BrowserCookie:
"firefox": firefox,
"librewolf": librewolf,
"safari": safari,
"Chrome": chrome,
"Chromium": chromium,
"Opera": opera,
"Opera_gx": opera_gx,
"Brave": brave,
"Edge": edge,
"Vivaldi": vivaldi,
"Firefox": firefox,
"Librewolf": librewolf,
"Safari": safari,
}
@classmethod
@ -56,7 +45,7 @@ class BrowserCookie:
@classmethod
def __browser_object(cls, browser: str | int):
if isinstance(browser, str):
return cls.SUPPORT_BROWSER[browser]
return cls.SUPPORT_BROWSER[browser.lower()]
elif isinstance(browser, int):
return list(cls.SUPPORT_BROWSER.values())[browser - 1]
raise TypeError

View File

@ -15,13 +15,30 @@ __all__ = ["Manager"]
class Manager:
NAME = compile(r"[^\u4e00-\u9fffa-zA-Z0-9“”《》]")
NAME = compile(r"[^\u4e00-\u9fffa-zA-Z0-9-_“”《》]")
NAME_KEYS = (
'收藏数量',
'评论数量',
'分享数量',
'点赞数量',
'作品标签',
'作品ID',
'作品标题',
'作品描述',
'作品类型',
'发布时间',
'最后更新时间',
'作者昵称',
'作者ID',
)
SEPARATE = "_"
def __init__(
self,
root: Path,
path: str,
folder: str,
name_format: str,
user_agent: str,
chunk: int,
cookie: str,
@ -44,6 +61,7 @@ class Manager:
self.headers = self.blank_headers | {"Cookie": cookie}
self.retry = retry
self.chunk = chunk
self.name_format = self.__check_name_format(name_format)
self.record_data = self.check_bool(record_data, False)
self.image_format = self.__check_image_format(image_format)
self.folder_mode = self.check_bool(folder_mode, False)
@ -118,3 +136,14 @@ class Manager:
await self.request_session.close()
await self.download_session.close()
self.__clean()
def __check_name_format(self, format_: str) -> str:
keys = format_.split()
return next(
(
"发布时间 作者昵称 作品标题"
for key in keys
if key not in self.NAME_KEYS
),
format_,
)

View File

@ -67,7 +67,7 @@ class DataRecorder(IDRecorder):
("点赞数量", "TEXT"),
("作者昵称", "TEXT"),
("作者ID", "TEXT"),
("IP归属地", "TEXT"),
# ("IP归属地", "TEXT"),
("作者链接", "TEXT"),
("作品链接", "TEXT"),
("下载地址", "TEXT"),

View File

@ -10,6 +10,7 @@ class Settings:
default = {
"work_path": "",
"folder_name": "Download",
"name_format": "发布时间 作者昵称 作品标题",
"user_agent": "",
"cookie": "",
"proxy": None,
@ -38,7 +39,7 @@ class Settings:
def create(self) -> dict:
with self.file.open("w", encoding=self.encode) as f:
dump(self.default, f, indent=4)
dump(self.default, f, indent=4, ensure_ascii=False)
return self.default
def update(self, data: dict):