mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2025-12-26 04:48:05 +08:00
117 lines
3.5 KiB
Python
117 lines
3.5 KiB
Python
from copy import deepcopy
|
|
from types import SimpleNamespace
|
|
|
|
from lxml.etree import HTML
|
|
from yaml import safe_load
|
|
|
|
__all__ = ["Converter", "Namespace"]
|
|
|
|
|
|
class Converter:
|
|
INITIAL_STATE = "(//script)[last()]/text()"
|
|
KEYS_LINK = (
|
|
"note",
|
|
"noteDetailMap",
|
|
"[-1]",
|
|
"note",
|
|
)
|
|
|
|
def run(self, content: str) -> dict:
|
|
return self.__filter_object(
|
|
self.__convert_object(
|
|
self.__extract_object(content)))
|
|
|
|
def __extract_object(self, html: str) -> str:
|
|
html_tree = HTML(html)
|
|
return d[0] if (d := html_tree.xpath(self.INITIAL_STATE)) else ""
|
|
|
|
@staticmethod
|
|
def __convert_object(text: str) -> dict:
|
|
return safe_load(text.lstrip("window.__INITIAL_STATE__="))
|
|
|
|
@classmethod
|
|
def __filter_object(cls, data: dict) -> dict:
|
|
return cls.deep_get(data, cls.KEYS_LINK) or {}
|
|
|
|
@classmethod
|
|
def deep_get(cls, data: dict, keys: list | tuple, default=None):
|
|
try:
|
|
for key in keys:
|
|
if key.startswith("[") and key.endswith("]"):
|
|
data = cls.safe_get(data, int(key[1:-1]))
|
|
else:
|
|
data = data[key]
|
|
return data
|
|
except (KeyError, IndexError, ValueError):
|
|
return default
|
|
|
|
@staticmethod
|
|
def safe_get(data: dict | list | tuple | set, index: int):
|
|
if isinstance(data, dict):
|
|
return list(data.values())[index]
|
|
elif isinstance(data, list | tuple | set):
|
|
return data[index]
|
|
raise TypeError
|
|
|
|
|
|
class Namespace:
|
|
def __init__(self, data: dict):
|
|
self.data = self.generate_data_object(data)
|
|
|
|
@staticmethod
|
|
def generate_data_object(data: dict) -> SimpleNamespace:
|
|
def depth_conversion(element):
|
|
if isinstance(element, dict):
|
|
return SimpleNamespace(
|
|
**{k: depth_conversion(v) for k, v in element.items()})
|
|
elif isinstance(element, list):
|
|
return [depth_conversion(item) for item in element]
|
|
else:
|
|
return element
|
|
|
|
return depth_conversion(data)
|
|
|
|
def safe_extract(
|
|
self,
|
|
attribute_chain: str,
|
|
default: str | int | list | dict | SimpleNamespace = ""):
|
|
return self.__safe_extract(self.data, attribute_chain, default)
|
|
|
|
@staticmethod
|
|
def __safe_extract(
|
|
data_object,
|
|
attribute_chain: str,
|
|
default: str | int | list | dict | SimpleNamespace = "", ):
|
|
data = deepcopy(data_object)
|
|
attributes = attribute_chain.split(".")
|
|
for attribute in attributes:
|
|
if "[" in attribute:
|
|
parts = attribute.split("[", 1)
|
|
attribute = parts[0]
|
|
index = parts[1].split("]", 1)[0]
|
|
try:
|
|
index = int(index)
|
|
data = getattr(data, attribute, None)[index]
|
|
except (IndexError, TypeError, ValueError):
|
|
return default
|
|
else:
|
|
data = getattr(data, attribute, None)
|
|
if not data:
|
|
return default
|
|
return data or default
|
|
|
|
@classmethod
|
|
def object_extract(
|
|
cls,
|
|
data_object: SimpleNamespace,
|
|
attribute_chain: str,
|
|
default: str | int | list | dict | SimpleNamespace = "",
|
|
):
|
|
return cls.__safe_extract(
|
|
data_object,
|
|
attribute_chain,
|
|
default, )
|
|
|
|
def __dict__(self):
|
|
return vars(self.data)
|