mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2026-03-22 06:57:16 +08:00
优化无水印图片链接提取逻辑
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
from json import loads
|
||||
from re import compile
|
||||
|
||||
from .Html import Html
|
||||
@@ -6,7 +7,26 @@ __all__ = ['Image']
|
||||
|
||||
|
||||
class Image:
|
||||
IMAGE_URL = compile(r'"CRD_WM_[A-Z]{3,4}","url":"(.*?_wm_1)"')
|
||||
IMAGE_INFO = compile(r'("infoList":\[\{.*?}])')
|
||||
|
||||
def get_image_link(self, html: str) -> list:
|
||||
return [Html.format_url(i) for i in self.IMAGE_URL.findall(html)]
|
||||
data = self.__extract_image_data(html)
|
||||
data = self.__format_image_data(data)
|
||||
return self.__extract_image_urls(data)
|
||||
|
||||
def __extract_image_data(self, html: str) -> list[str]:
|
||||
return self.IMAGE_INFO.findall(html)
|
||||
|
||||
@staticmethod
|
||||
def __format_image_data(data: list[str]) -> list[dict]:
|
||||
return [loads(f"{{{i}}}") for i in data]
|
||||
|
||||
@staticmethod
|
||||
def __extract_image_urls(data: list[dict]) -> list[str]:
|
||||
urls = []
|
||||
for i in data:
|
||||
for j in i.get("infoList", []):
|
||||
if j.get("imageScene", "").startswith("CRD_WM_"):
|
||||
urls.append(j.get("url", ""))
|
||||
break
|
||||
return [Html.format_url(i) for i in urls if i]
|
||||
|
||||
Reference in New Issue
Block a user