mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2025-12-26 04:48:05 +08:00
fix: 修复非法文件名称报错的问题
1. 替换文件名称包含的非法字符 2. 替换文件名称包含的 Emoji 表情
This commit is contained in:
parent
ce01a7b2d7
commit
e2eee03feb
@ -426,3 +426,4 @@ async def example():
|
||||
* https://aiosqlite.omnilib.dev/en/stable/
|
||||
* https://click.palletsprojects.com/en/8.1.x/
|
||||
* https://github.com/thewh1teagle/rookie
|
||||
* https://github.com/carpedm20/emoji/
|
||||
|
||||
@ -427,3 +427,4 @@ async def example():
|
||||
* https://aiosqlite.omnilib.dev/en/stable/
|
||||
* https://click.palletsprojects.com/en/8.1.x/
|
||||
* https://github.com/thewh1teagle/rookie
|
||||
* https://github.com/carpedm20/emoji/
|
||||
|
||||
@ -9,3 +9,4 @@ httpx>=0.27.0
|
||||
fastapi>=0.111.0
|
||||
uvicorn>=0.30.1
|
||||
aiofiles>=24.1.0
|
||||
emoji>=2.12.1
|
||||
|
||||
@ -17,8 +17,10 @@ from uvicorn import Config
|
||||
from uvicorn import Server
|
||||
|
||||
from source.expansion import BrowserCookie
|
||||
from source.expansion import Cleaner
|
||||
from source.expansion import Converter
|
||||
from source.expansion import Namespace
|
||||
from source.expansion import beautify_string
|
||||
from source.module import DataRecorder
|
||||
from source.module import ExtractData
|
||||
from source.module import ExtractParams
|
||||
@ -65,6 +67,7 @@ class XHS:
|
||||
SHARE = compile(r"https?://www\.xiaohongshu\.com/discovery/item/\S+")
|
||||
SHORT = compile(r"https?://xhslink\.com/\S+")
|
||||
__INSTANCE = None
|
||||
CLEANER = Cleaner()
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
if not cls.__INSTANCE:
|
||||
@ -270,7 +273,13 @@ class XHS:
|
||||
values.append(self.__get_name_title(data))
|
||||
case _:
|
||||
values.append(data[key])
|
||||
return self.manager.SEPARATE.join(values)
|
||||
return self.CLEANER.filter_name(
|
||||
self.manager.SEPARATE.join(values),
|
||||
default=self.manager.SEPARATE.join((
|
||||
data["作者ID"],
|
||||
data["作品ID"],
|
||||
)),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def __get_name_time(data: dict) -> str:
|
||||
@ -280,7 +289,10 @@ class XHS:
|
||||
return self.manager.filter_name(data["作者昵称"]) or data["作者ID"]
|
||||
|
||||
def __get_name_title(self, data: dict) -> str:
|
||||
return self.manager.filter_name(data["作品标题"])[:64] or data["作品ID"]
|
||||
return beautify_string(
|
||||
self.manager.filter_name(data["作品标题"]),
|
||||
64,
|
||||
) or data["作品ID"]
|
||||
|
||||
async def monitor(self, delay=1, download=False, log=None, bar=None, data=True, ) -> None:
|
||||
logging(
|
||||
|
||||
@ -6,3 +6,4 @@ from .truncate import trim_string
|
||||
from .truncate import truncate_string
|
||||
from .file_folder import file_switch
|
||||
from .file_folder import remove_empty_directories
|
||||
from .cleaner import Cleaner
|
||||
|
||||
89
source/expansion/cleaner.py
Normal file
89
source/expansion/cleaner.py
Normal file
@ -0,0 +1,89 @@
|
||||
from platform import system
|
||||
from string import whitespace
|
||||
from emoji import replace_emoji
|
||||
from warnings import warn
|
||||
|
||||
|
||||
class Cleaner:
|
||||
def __init__(self):
|
||||
"""
|
||||
替换字符串中包含的非法字符,默认根据系统类型生成对应的非法字符字典,也可以自行设置非法字符字典
|
||||
"""
|
||||
self.rule = self.default_rule() # 默认非法字符字典
|
||||
|
||||
@staticmethod
|
||||
def default_rule():
|
||||
"""根据系统类型生成默认非法字符字典"""
|
||||
if (s := system()) in ("Windows", "Darwin"):
|
||||
rule = {
|
||||
"/": "",
|
||||
"\\": "",
|
||||
"|": "",
|
||||
"<": "",
|
||||
">": "",
|
||||
"\"": "",
|
||||
"?": "",
|
||||
":": "",
|
||||
"*": "",
|
||||
"\x00": "",
|
||||
} # Windows 系统和 Mac 系统
|
||||
elif s == "Linux":
|
||||
rule = {
|
||||
"/": "",
|
||||
"\x00": "",
|
||||
} # Linux 系统
|
||||
else:
|
||||
warn("不受支持的操作系统类型,可能无法正常去除非法字符!")
|
||||
rule = {}
|
||||
cache = {i: "" for i in whitespace[1:]} # 补充换行符等非法字符
|
||||
return rule | cache
|
||||
|
||||
def set_rule(self, rule: dict[str, str], update=True):
|
||||
"""
|
||||
设置非法字符字典
|
||||
|
||||
:param rule: 替换规则,字典格式,键为非法字符,值为替换后的内容
|
||||
:param update: 如果是 True,则与原有规则字典合并,否则替换原有规则字典
|
||||
"""
|
||||
self.rule = {**self.rule, **rule} if update else rule
|
||||
|
||||
def filter(self, text: str) -> str:
|
||||
"""
|
||||
去除非法字符
|
||||
|
||||
:param text: 待处理的字符串
|
||||
:return: 替换后的字符串,如果替换后字符串为空,则返回 None
|
||||
"""
|
||||
for i in self.rule:
|
||||
text = text.replace(i, self.rule[i])
|
||||
return text
|
||||
|
||||
def filter_name(
|
||||
self,
|
||||
text: str,
|
||||
replace: str = "",
|
||||
default: str = "",
|
||||
) -> str:
|
||||
"""过滤文件夹名称中的非法字符"""
|
||||
text = text.replace(":", ".")
|
||||
|
||||
text = self.filter(text)
|
||||
|
||||
text = replace_emoji(text, replace, )
|
||||
|
||||
text = self.clear_spaces(text)
|
||||
|
||||
text = text.strip().strip(".").strip("_")
|
||||
|
||||
return text or default
|
||||
|
||||
@staticmethod
|
||||
def clear_spaces(string: str):
|
||||
"""将连续的空格转换为单个空格"""
|
||||
return " ".join(string.split())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo = Cleaner()
|
||||
print(demo.rule)
|
||||
print(demo.filter_name(""))
|
||||
Loading…
x
Reference in New Issue
Block a user