更新代码

This commit is contained in:
JoeamAmier 2023-08-20 18:30:46 +08:00
parent fd810cb58a
commit 3571129ed6
4 changed files with 58 additions and 26 deletions

View File

@ -2,10 +2,11 @@ from source import XHS
def example(): def example():
test_cookie = "abRequestId=25c57ab7-8cbf-5383-b020-08852c1704e1; webBuild=3.4.1; xsecappid=xhs-pc-web; a1=18a033d274338lwsfacj9x5bpf4fznhhc8xrakemj50000250510; webId=93c0636350d85103d93bca88da2959cd; websectiga=2a3d3ea002e7d92b5c9743590ebd24010cf3710ff3af8029153751e41a6af4a3; sec_poison_id=ae1f0190-4d0c-45f5-a75f-68c2d87c5573; web_session=030037a3ed570060b3a43845a6234a2f100ef4; gid=yY08qqfYy0SSyY08qqfJWYTd4qqY1EMi0SVjC6VC2DUi4F28iuIxx0888J282y880JfWY0Di; cache_feeds=[]" image_demo = "https://www.xiaohongshu.com/explore/64d1b406000000000103ee8d"
demo = "https://www.xiaohongshu.com/explore/64a3a5170000000031008914" video_demo = "https://www.xiaohongshu.com/explore/64c05652000000000c0378e7"
xhs = XHS() xhs = XHS()
print(xhs.get_image(demo, cookie=test_cookie)) print(xhs.get_image(image_demo))
print(xhs.get_video(video_demo))
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1,14 +1,23 @@
from .Params import ID from re import compile
from .Params import IMAGE_API
def get_id(html: str) -> list: class Image:
return ID.findall(html) IMAGE_API = "https://sns-img-qc.xhscdn.com/"
IMAGE_ID = compile(r'"traceId":"(.*?)"')
def __init__(self, html, params):
self.html = html
self.params = params
def generate_url(ids: list) -> list: def get_image_link(self, url: str, download: bool):
return [IMAGE_API + i for i in ids] html = self.html.get_html(url)
return self.__get_image_links(html)
def __get_id(self, html: str) -> list:
return self.IMAGE_ID.findall(html)
def get_url(html: str) -> list: def __generate_url(self, ids: list) -> list:
return generate_url(get_id(html)) return [self.IMAGE_API + i for i in ids]
def __get_image_links(self, html: str) -> list:
return self.__generate_url(self.__get_id(html))

20
source/Video.py Normal file
View File

@ -0,0 +1,20 @@
from re import compile
class Video:
VIDEO_ID = compile(r'"masterUrl":"(.*?)"')
def __init__(self, html, params):
self.html = html
self.params = params
def get_video_link(self, url: str, download: bool):
html = self.html.get_html(url)
return self.__get_video_link(html)
def __get_video_link(self, html: str) -> list:
return [self.clean_url(u) for u in self.VIDEO_ID.findall(html)]
@staticmethod
def clean_url(url: str) -> str:
return bytes(url, "utf-8").decode("unicode_escape")

View File

@ -1,22 +1,24 @@
from .Html import get_html from .Html import Html
from .Image import get_url from .Image import Image
from .Params import Params from .Params import Params
from .Video import Video
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.203",
"Cookie": "abRequestId=fd245483-beed-57b0-abfc-440b6a6be2aa; webBuild=3.4.1; xsecappid=xhs-pc-web; a1=189fe37918ezx1jqcbe9fin95cnxqj2ewcbc250yp50000234538; webId=9fff21309cfd3e4f380a6c75ed463803; websectiga=f47eda31ec99545da40c2f731f0630efd2b0959e1dd10d5fedac3dce0bd1e04d; sec_poison_id=003395d3-6520-4a02-851a-17d093203251; web_session=030037a3efee2e602d5d16fca4234a8a44466c; gid=yYjidqWi2KE4yYjidqWjyS28YduCyVASDdjiDvU3Ij2SIS28CAVJdJ888Jq42qY88J44DyjS",
}
class XHS: class XHS:
def __init__(self, path="./"): def __init__(self, path="./", headers=None):
self.params = Params(path) self.params = Params(path)
self.image = Image(self.params) self.html = Html(headers or HEADERS)
self.image = Image(self.html, self.params)
self.video = Video(self.html, self.params)
self._cookie = ""
def get_image(self, url: str, cookie=None, ): def get_image(self, url: str, download=False):
return self.image.get_image_link(url, cookie) return self.image.get_image_link(url, download)
def get_video(self, url: str, download=False):
class Image: return self.video.get_video_link(url, download)
def __init__(self, params):
self.params = params
@staticmethod
def get_image_link(url: str, cookie=None, ):
html = get_html(url, cookie=cookie, )
return get_url(html)