From 38c93a551b158204cfc8f351616dc9c4fb2a2134 Mon Sep 17 00:00:00 2001 From: JoeamAmier Date: Sun, 17 Sep 2023 15:55:25 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=97=A0=E6=B0=B4=E5=8D=B0?= =?UTF-8?q?=E5=9B=BE=E7=89=87=E9=93=BE=E6=8E=A5=E6=8F=90=E5=8F=96=E9=80=BB?= =?UTF-8?q?=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- source/Image.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/source/Image.py b/source/Image.py index 3b051fe..9462401 100644 --- a/source/Image.py +++ b/source/Image.py @@ -1,3 +1,4 @@ +from json import loads from re import compile from .Html import Html @@ -6,7 +7,26 @@ __all__ = ['Image'] class Image: - IMAGE_URL = compile(r'"CRD_WM_[A-Z]{3,4}","url":"(.*?_wm_1)"') + IMAGE_INFO = compile(r'("infoList":\[\{.*?}])') def get_image_link(self, html: str) -> list: - return [Html.format_url(i) for i in self.IMAGE_URL.findall(html)] + data = self.__extract_image_data(html) + data = self.__format_image_data(data) + return self.__extract_image_urls(data) + + def __extract_image_data(self, html: str) -> list[str]: + return self.IMAGE_INFO.findall(html) + + @staticmethod + def __format_image_data(data: list[str]) -> list[dict]: + return [loads(f"{{{i}}}") for i in data] + + @staticmethod + def __extract_image_urls(data: list[dict]) -> list[str]: + urls = [] + for i in data: + for j in i.get("infoList", []): + if j.get("imageScene", "").startswith("CRD_WM_"): + urls.append(j.get("url", "")) + break + return [Html.format_url(i) for i in urls if i]