mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2026-03-22 06:57:16 +08:00
fix: 修复非法文件名称报错的问题
1. 替换文件名称包含的非法字符 2. 替换文件名称包含的 Emoji 表情
This commit is contained in:
@@ -426,3 +426,4 @@ async def example():
|
|||||||
* https://aiosqlite.omnilib.dev/en/stable/
|
* https://aiosqlite.omnilib.dev/en/stable/
|
||||||
* https://click.palletsprojects.com/en/8.1.x/
|
* https://click.palletsprojects.com/en/8.1.x/
|
||||||
* https://github.com/thewh1teagle/rookie
|
* https://github.com/thewh1teagle/rookie
|
||||||
|
* https://github.com/carpedm20/emoji/
|
||||||
|
|||||||
@@ -427,3 +427,4 @@ async def example():
|
|||||||
* https://aiosqlite.omnilib.dev/en/stable/
|
* https://aiosqlite.omnilib.dev/en/stable/
|
||||||
* https://click.palletsprojects.com/en/8.1.x/
|
* https://click.palletsprojects.com/en/8.1.x/
|
||||||
* https://github.com/thewh1teagle/rookie
|
* https://github.com/thewh1teagle/rookie
|
||||||
|
* https://github.com/carpedm20/emoji/
|
||||||
|
|||||||
@@ -9,3 +9,4 @@ httpx>=0.27.0
|
|||||||
fastapi>=0.111.0
|
fastapi>=0.111.0
|
||||||
uvicorn>=0.30.1
|
uvicorn>=0.30.1
|
||||||
aiofiles>=24.1.0
|
aiofiles>=24.1.0
|
||||||
|
emoji>=2.12.1
|
||||||
|
|||||||
@@ -17,8 +17,10 @@ from uvicorn import Config
|
|||||||
from uvicorn import Server
|
from uvicorn import Server
|
||||||
|
|
||||||
from source.expansion import BrowserCookie
|
from source.expansion import BrowserCookie
|
||||||
|
from source.expansion import Cleaner
|
||||||
from source.expansion import Converter
|
from source.expansion import Converter
|
||||||
from source.expansion import Namespace
|
from source.expansion import Namespace
|
||||||
|
from source.expansion import beautify_string
|
||||||
from source.module import DataRecorder
|
from source.module import DataRecorder
|
||||||
from source.module import ExtractData
|
from source.module import ExtractData
|
||||||
from source.module import ExtractParams
|
from source.module import ExtractParams
|
||||||
@@ -65,6 +67,7 @@ class XHS:
|
|||||||
SHARE = compile(r"https?://www\.xiaohongshu\.com/discovery/item/\S+")
|
SHARE = compile(r"https?://www\.xiaohongshu\.com/discovery/item/\S+")
|
||||||
SHORT = compile(r"https?://xhslink\.com/\S+")
|
SHORT = compile(r"https?://xhslink\.com/\S+")
|
||||||
__INSTANCE = None
|
__INSTANCE = None
|
||||||
|
CLEANER = Cleaner()
|
||||||
|
|
||||||
def __new__(cls, *args, **kwargs):
|
def __new__(cls, *args, **kwargs):
|
||||||
if not cls.__INSTANCE:
|
if not cls.__INSTANCE:
|
||||||
@@ -270,7 +273,13 @@ class XHS:
|
|||||||
values.append(self.__get_name_title(data))
|
values.append(self.__get_name_title(data))
|
||||||
case _:
|
case _:
|
||||||
values.append(data[key])
|
values.append(data[key])
|
||||||
return self.manager.SEPARATE.join(values)
|
return self.CLEANER.filter_name(
|
||||||
|
self.manager.SEPARATE.join(values),
|
||||||
|
default=self.manager.SEPARATE.join((
|
||||||
|
data["作者ID"],
|
||||||
|
data["作品ID"],
|
||||||
|
)),
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def __get_name_time(data: dict) -> str:
|
def __get_name_time(data: dict) -> str:
|
||||||
@@ -280,7 +289,10 @@ class XHS:
|
|||||||
return self.manager.filter_name(data["作者昵称"]) or data["作者ID"]
|
return self.manager.filter_name(data["作者昵称"]) or data["作者ID"]
|
||||||
|
|
||||||
def __get_name_title(self, data: dict) -> str:
|
def __get_name_title(self, data: dict) -> str:
|
||||||
return self.manager.filter_name(data["作品标题"])[:64] or data["作品ID"]
|
return beautify_string(
|
||||||
|
self.manager.filter_name(data["作品标题"]),
|
||||||
|
64,
|
||||||
|
) or data["作品ID"]
|
||||||
|
|
||||||
async def monitor(self, delay=1, download=False, log=None, bar=None, data=True, ) -> None:
|
async def monitor(self, delay=1, download=False, log=None, bar=None, data=True, ) -> None:
|
||||||
logging(
|
logging(
|
||||||
|
|||||||
@@ -6,3 +6,4 @@ from .truncate import trim_string
|
|||||||
from .truncate import truncate_string
|
from .truncate import truncate_string
|
||||||
from .file_folder import file_switch
|
from .file_folder import file_switch
|
||||||
from .file_folder import remove_empty_directories
|
from .file_folder import remove_empty_directories
|
||||||
|
from .cleaner import Cleaner
|
||||||
|
|||||||
89
source/expansion/cleaner.py
Normal file
89
source/expansion/cleaner.py
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
from platform import system
|
||||||
|
from string import whitespace
|
||||||
|
from emoji import replace_emoji
|
||||||
|
from warnings import warn
|
||||||
|
|
||||||
|
|
||||||
|
class Cleaner:
|
||||||
|
def __init__(self):
|
||||||
|
"""
|
||||||
|
替换字符串中包含的非法字符,默认根据系统类型生成对应的非法字符字典,也可以自行设置非法字符字典
|
||||||
|
"""
|
||||||
|
self.rule = self.default_rule() # 默认非法字符字典
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def default_rule():
|
||||||
|
"""根据系统类型生成默认非法字符字典"""
|
||||||
|
if (s := system()) in ("Windows", "Darwin"):
|
||||||
|
rule = {
|
||||||
|
"/": "",
|
||||||
|
"\\": "",
|
||||||
|
"|": "",
|
||||||
|
"<": "",
|
||||||
|
">": "",
|
||||||
|
"\"": "",
|
||||||
|
"?": "",
|
||||||
|
":": "",
|
||||||
|
"*": "",
|
||||||
|
"\x00": "",
|
||||||
|
} # Windows 系统和 Mac 系统
|
||||||
|
elif s == "Linux":
|
||||||
|
rule = {
|
||||||
|
"/": "",
|
||||||
|
"\x00": "",
|
||||||
|
} # Linux 系统
|
||||||
|
else:
|
||||||
|
warn("不受支持的操作系统类型,可能无法正常去除非法字符!")
|
||||||
|
rule = {}
|
||||||
|
cache = {i: "" for i in whitespace[1:]} # 补充换行符等非法字符
|
||||||
|
return rule | cache
|
||||||
|
|
||||||
|
def set_rule(self, rule: dict[str, str], update=True):
|
||||||
|
"""
|
||||||
|
设置非法字符字典
|
||||||
|
|
||||||
|
:param rule: 替换规则,字典格式,键为非法字符,值为替换后的内容
|
||||||
|
:param update: 如果是 True,则与原有规则字典合并,否则替换原有规则字典
|
||||||
|
"""
|
||||||
|
self.rule = {**self.rule, **rule} if update else rule
|
||||||
|
|
||||||
|
def filter(self, text: str) -> str:
|
||||||
|
"""
|
||||||
|
去除非法字符
|
||||||
|
|
||||||
|
:param text: 待处理的字符串
|
||||||
|
:return: 替换后的字符串,如果替换后字符串为空,则返回 None
|
||||||
|
"""
|
||||||
|
for i in self.rule:
|
||||||
|
text = text.replace(i, self.rule[i])
|
||||||
|
return text
|
||||||
|
|
||||||
|
def filter_name(
|
||||||
|
self,
|
||||||
|
text: str,
|
||||||
|
replace: str = "",
|
||||||
|
default: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""过滤文件夹名称中的非法字符"""
|
||||||
|
text = text.replace(":", ".")
|
||||||
|
|
||||||
|
text = self.filter(text)
|
||||||
|
|
||||||
|
text = replace_emoji(text, replace, )
|
||||||
|
|
||||||
|
text = self.clear_spaces(text)
|
||||||
|
|
||||||
|
text = text.strip().strip(".").strip("_")
|
||||||
|
|
||||||
|
return text or default
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def clear_spaces(string: str):
|
||||||
|
"""将连续的空格转换为单个空格"""
|
||||||
|
return " ".join(string.split())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
demo = Cleaner()
|
||||||
|
print(demo.rule)
|
||||||
|
print(demo.filter_name(""))
|
||||||
Reference in New Issue
Block a user