From 3571129ed66daec622b0e6f55e5415b14eeb19d9 Mon Sep 17 00:00:00 2001 From: JoeamAmier Date: Sun, 20 Aug 2023 18:30:46 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 7 ++++--- source/Image.py | 25 +++++++++++++++++-------- source/Video.py | 20 ++++++++++++++++++++ source/__init__.py | 32 +++++++++++++++++--------------- 4 files changed, 58 insertions(+), 26 deletions(-) create mode 100644 source/Video.py diff --git a/main.py b/main.py index ee11758..2205da7 100644 --- a/main.py +++ b/main.py @@ -2,10 +2,11 @@ from source import XHS def example(): - test_cookie = "abRequestId=25c57ab7-8cbf-5383-b020-08852c1704e1; webBuild=3.4.1; xsecappid=xhs-pc-web; a1=18a033d274338lwsfacj9x5bpf4fznhhc8xrakemj50000250510; webId=93c0636350d85103d93bca88da2959cd; websectiga=2a3d3ea002e7d92b5c9743590ebd24010cf3710ff3af8029153751e41a6af4a3; sec_poison_id=ae1f0190-4d0c-45f5-a75f-68c2d87c5573; web_session=030037a3ed570060b3a43845a6234a2f100ef4; gid=yY08qqfYy0SSyY08qqfJWYTd4qqY1EMi0SVjC6VC2DUi4F28iuIxx0888J282y880JfWY0Di; cache_feeds=[]" - demo = "https://www.xiaohongshu.com/explore/64a3a5170000000031008914" + image_demo = "https://www.xiaohongshu.com/explore/64d1b406000000000103ee8d" + video_demo = "https://www.xiaohongshu.com/explore/64c05652000000000c0378e7" xhs = XHS() - print(xhs.get_image(demo, cookie=test_cookie)) + print(xhs.get_image(image_demo)) + print(xhs.get_video(video_demo)) if __name__ == '__main__': diff --git a/source/Image.py b/source/Image.py index 24207c4..1c91032 100644 --- a/source/Image.py +++ b/source/Image.py @@ -1,14 +1,23 @@ -from .Params import ID -from .Params import IMAGE_API +from re import compile -def get_id(html: str) -> list: - return ID.findall(html) +class Image: + IMAGE_API = "https://sns-img-qc.xhscdn.com/" + IMAGE_ID = compile(r'"traceId":"(.*?)"') + def __init__(self, html, params): + self.html = html + self.params = params -def generate_url(ids: list) -> list: - return [IMAGE_API + i for i in ids] + def get_image_link(self, url: str, download: bool): + html = self.html.get_html(url) + return self.__get_image_links(html) + def __get_id(self, html: str) -> list: + return self.IMAGE_ID.findall(html) -def get_url(html: str) -> list: - return generate_url(get_id(html)) + def __generate_url(self, ids: list) -> list: + return [self.IMAGE_API + i for i in ids] + + def __get_image_links(self, html: str) -> list: + return self.__generate_url(self.__get_id(html)) diff --git a/source/Video.py b/source/Video.py new file mode 100644 index 0000000..a9f7afa --- /dev/null +++ b/source/Video.py @@ -0,0 +1,20 @@ +from re import compile + + +class Video: + VIDEO_ID = compile(r'"masterUrl":"(.*?)"') + + def __init__(self, html, params): + self.html = html + self.params = params + + def get_video_link(self, url: str, download: bool): + html = self.html.get_html(url) + return self.__get_video_link(html) + + def __get_video_link(self, html: str) -> list: + return [self.clean_url(u) for u in self.VIDEO_ID.findall(html)] + + @staticmethod + def clean_url(url: str) -> str: + return bytes(url, "utf-8").decode("unicode_escape") diff --git a/source/__init__.py b/source/__init__.py index 901fb0c..1ccb904 100644 --- a/source/__init__.py +++ b/source/__init__.py @@ -1,22 +1,24 @@ -from .Html import get_html -from .Image import get_url +from .Html import Html +from .Image import Image from .Params import Params +from .Video import Video + +HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.203", + "Cookie": "abRequestId=fd245483-beed-57b0-abfc-440b6a6be2aa; webBuild=3.4.1; xsecappid=xhs-pc-web; a1=189fe37918ezx1jqcbe9fin95cnxqj2ewcbc250yp50000234538; webId=9fff21309cfd3e4f380a6c75ed463803; websectiga=f47eda31ec99545da40c2f731f0630efd2b0959e1dd10d5fedac3dce0bd1e04d; sec_poison_id=003395d3-6520-4a02-851a-17d093203251; web_session=030037a3efee2e602d5d16fca4234a8a44466c; gid=yYjidqWi2KE4yYjidqWjyS28YduCyVASDdjiDvU3Ij2SIS28CAVJdJ888Jq42qY88J44DyjS", +} class XHS: - def __init__(self, path="./"): + def __init__(self, path="./", headers=None): self.params = Params(path) - self.image = Image(self.params) + self.html = Html(headers or HEADERS) + self.image = Image(self.html, self.params) + self.video = Video(self.html, self.params) + self._cookie = "" - def get_image(self, url: str, cookie=None, ): - return self.image.get_image_link(url, cookie) + def get_image(self, url: str, download=False): + return self.image.get_image_link(url, download) - -class Image: - def __init__(self, params): - self.params = params - - @staticmethod - def get_image_link(url: str, cookie=None, ): - html = get_html(url, cookie=cookie, ) - return get_url(html) + def get_video(self, url: str, download=False): + return self.video.get_video_link(url, download)