mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2025-12-26 04:48:05 +08:00
39 lines
1.3 KiB
Python
39 lines
1.3 KiB
Python
from json import loads
|
|
from re import compile
|
|
__all__ = ['Image']
|
|
|
|
|
|
class Image:
|
|
IMAGE_INFO = compile(r'("infoList":\[\{.*?}])')
|
|
IMAGE_TOKEN = compile(
|
|
r"http://sns-webpic-qc.xhscdn.com/\d+/\w+/(\w+)!")
|
|
|
|
def get_image_link(self, html: str) -> list:
|
|
data = self.__extract_image_data(html)
|
|
data = self.__format_image_data(data)
|
|
return self.__extract_image_urls(data)
|
|
|
|
def __extract_image_data(self, html: str) -> list[str]:
|
|
return self.IMAGE_INFO.findall(html)
|
|
|
|
@staticmethod
|
|
def __format_image_data(data: list[str]) -> list[dict]:
|
|
return [loads(f"{{{i}}}") for i in data]
|
|
|
|
@staticmethod
|
|
def __generate_image_link(token: str) -> str:
|
|
return f"https://ci.xiaohongshu.com/{token}?imageView2/2/w/format/png"
|
|
|
|
def __extract_image_token(self, url: str) -> str:
|
|
return self.__generate_image_link(token[0]) if len(
|
|
token := self.IMAGE_TOKEN.findall(url)) == 1 else ""
|
|
|
|
def __extract_image_urls(self, data: list[dict]) -> list[str]:
|
|
urls = []
|
|
for i in data:
|
|
for j in i.get("infoList", []):
|
|
if j.get("imageScene", "").startswith("CRD_WM_"):
|
|
urls.append(self.__extract_image_token(j.get("url", "")))
|
|
break
|
|
return [i for i in urls if i]
|