mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2025-12-26 04:48:05 +08:00
117 lines
3.1 KiB
Python
117 lines
3.1 KiB
Python
from platform import system
|
||
from re import compile
|
||
from string import whitespace
|
||
from warnings import warn
|
||
|
||
from emoji import replace_emoji
|
||
|
||
try:
|
||
from source.translation import _
|
||
except ImportError:
|
||
_ = lambda s: s
|
||
|
||
|
||
class Cleaner:
|
||
CONTROL_CHARACTERS = compile(r"[\x00-\x1F\x7F]")
|
||
|
||
def __init__(self):
|
||
"""
|
||
替换字符串中包含的非法字符,默认根据系统类型生成对应的非法字符字典,也可以自行设置非法字符字典
|
||
"""
|
||
self.rule = self.default_rule() # 默认非法字符字典
|
||
|
||
@staticmethod
|
||
def default_rule():
|
||
"""根据系统类型生成默认非法字符字典"""
|
||
if (s := system()) in ("Windows", "Darwin"):
|
||
rule = {
|
||
"/": "",
|
||
"\\": "",
|
||
"|": "",
|
||
"<": "",
|
||
">": "",
|
||
'"': "",
|
||
"?": "",
|
||
":": "",
|
||
"*": "",
|
||
"\x00": "",
|
||
} # Windows 系统和 Mac 系统
|
||
elif s == "Linux":
|
||
rule = {
|
||
"/": "",
|
||
"\x00": "",
|
||
} # Linux 系统
|
||
else:
|
||
warn(_("不受支持的操作系统类型,可能无法正常去除非法字符!"))
|
||
rule = {}
|
||
cache = {i: "" for i in whitespace[1:]} # 补充换行符等非法字符
|
||
return rule | cache
|
||
|
||
def set_rule(self, rule: dict[str, str], update=True):
|
||
"""
|
||
设置非法字符字典
|
||
|
||
:param rule: 替换规则,字典格式,键为非法字符,值为替换后的内容
|
||
:param update: 如果是 True,则与原有规则字典合并,否则替换原有规则字典
|
||
"""
|
||
self.rule = {**self.rule, **rule} if update else rule
|
||
|
||
def filter(self, text: str) -> str:
|
||
"""
|
||
去除非法字符
|
||
|
||
:param text: 待处理的字符串
|
||
:return: 替换后的字符串,如果替换后字符串为空,则返回 None
|
||
"""
|
||
for i in self.rule:
|
||
text = text.replace(i, self.rule[i])
|
||
return text
|
||
|
||
def filter_name(
|
||
self,
|
||
text: str,
|
||
replace: str = "",
|
||
default: str = "",
|
||
) -> str:
|
||
"""过滤文件夹名称中的非法字符"""
|
||
text = text.replace(":", ".")
|
||
|
||
text = self.remove_control_characters(text)
|
||
|
||
text = self.filter(text)
|
||
|
||
text = replace_emoji(
|
||
text,
|
||
replace,
|
||
)
|
||
|
||
text = self.clear_spaces(text)
|
||
|
||
text = text.strip().strip(".").strip("_")
|
||
|
||
return text or default
|
||
|
||
@staticmethod
|
||
def clear_spaces(string: str):
|
||
"""将连续的空格转换为单个空格"""
|
||
return " ".join(string.split())
|
||
|
||
@classmethod
|
||
def remove_control_characters(
|
||
cls,
|
||
text,
|
||
replace="",
|
||
):
|
||
# 使用正则表达式匹配所有控制字符
|
||
return cls.CONTROL_CHARACTERS.sub(
|
||
replace,
|
||
text,
|
||
)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
demo = Cleaner()
|
||
print(demo.rule)
|
||
print(demo.filter_name(""))
|
||
print(demo.remove_control_characters("hello \x08world"))
|