更新代码

This commit is contained in:
JoeamAmier
2023-08-26 23:33:56 +08:00
parent 4b6014d73d
commit bba6940cc8
4 changed files with 26 additions and 10 deletions

View File

@@ -6,8 +6,9 @@ def example():
image_demo = "https://www.xiaohongshu.com/explore/64d1b406000000000103ee8d" image_demo = "https://www.xiaohongshu.com/explore/64d1b406000000000103ee8d"
video_demo = "https://www.xiaohongshu.com/explore/64c05652000000000c0378e7" video_demo = "https://www.xiaohongshu.com/explore/64c05652000000000c0378e7"
xhs = XHS() xhs = XHS()
print(xhs.get_image(image_demo)) # print(xhs.get_image(image_demo))
print(xhs.get_video(video_demo)) # print(xhs.get_video(video_demo))
print(xhs.extract(video_demo))
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -5,9 +5,10 @@ class Download:
def __init__( def __init__(
self, self,
path, path,
folder,
headers: dict, headers: dict,
proxies=None, ): proxies=None, ):
self.root = Path(path) self.root = Path(path).joinpath(folder)
self.headers = headers self.headers = headers
self.proxies = { self.proxies = {
"http": proxies, "http": proxies,

View File

@@ -1,9 +1,14 @@
from json import loads
from re import compile from re import compile
class Explore: class Explore:
explore_data = compile(r'"noteDetailMap": (\{.*?})') explore_data = compile(
r'"currentTime":\d{13},"note":(.*?)}},"serverRequestInfo"')
def __init__(self, html, url: str): def run(self, html: str):
self.html = html data = self.get_json_data(html)
self.url = url
def get_json_data(self, html: str) -> dict:
data = self.explore_data.findall(html)
return {} if len(data) != 1 else loads(data[0])

View File

@@ -1,4 +1,5 @@
from .Download import Download from .Download import Download
from .Explore import Explore
from .Html import Html from .Html import Html
from .Image import Image from .Image import Image
from .Video import Video from .Video import Video
@@ -13,15 +14,17 @@ class XHS:
def __init__( def __init__(
self, self,
path="./", path="./",
folder="Download",
headers=None, headers=None,
proxies=None, proxies=None,
timeout=10, timeout=10,
cookie=None): cookie=None):
self.set_cookie(cookie) self.set_cookie(cookie)
self.html = Html(headers or self.headers, proxies, timeout) self.html = Html(headers or self.headers, proxies, timeout)
self.image = Image(self.html) self.image = Image()
self.video = Video(self.html) self.video = Video()
self.download = Download(path, self.html.headers, proxies) self.explore = Explore()
self.download = Download(path, folder, self.html.headers, proxies)
def set_cookie(self, cookie: str): def set_cookie(self, cookie: str):
if cookie: if cookie:
@@ -38,3 +41,9 @@ class XHS:
if download: if download:
self.download.run([url]) self.download.run([url])
return url return url
def extract(self, url: str):
html = self.html.get_html(url)
if not html:
return None
self.explore.run(html)