更新代码

This commit is contained in:
JoeamAmier 2023-08-26 23:33:56 +08:00
parent 4b6014d73d
commit bba6940cc8
4 changed files with 26 additions and 10 deletions

View File

@ -6,8 +6,9 @@ def example():
image_demo = "https://www.xiaohongshu.com/explore/64d1b406000000000103ee8d"
video_demo = "https://www.xiaohongshu.com/explore/64c05652000000000c0378e7"
xhs = XHS()
print(xhs.get_image(image_demo))
print(xhs.get_video(video_demo))
# print(xhs.get_image(image_demo))
# print(xhs.get_video(video_demo))
print(xhs.extract(video_demo))
if __name__ == '__main__':

View File

@ -5,9 +5,10 @@ class Download:
def __init__(
self,
path,
folder,
headers: dict,
proxies=None, ):
self.root = Path(path)
self.root = Path(path).joinpath(folder)
self.headers = headers
self.proxies = {
"http": proxies,

View File

@ -1,9 +1,14 @@
from json import loads
from re import compile
class Explore:
explore_data = compile(r'"noteDetailMap": (\{.*?})')
explore_data = compile(
r'"currentTime":\d{13},"note":(.*?)}},"serverRequestInfo"')
def __init__(self, html, url: str):
self.html = html
self.url = url
def run(self, html: str):
data = self.get_json_data(html)
def get_json_data(self, html: str) -> dict:
data = self.explore_data.findall(html)
return {} if len(data) != 1 else loads(data[0])

View File

@ -1,4 +1,5 @@
from .Download import Download
from .Explore import Explore
from .Html import Html
from .Image import Image
from .Video import Video
@ -13,15 +14,17 @@ class XHS:
def __init__(
self,
path="./",
folder="Download",
headers=None,
proxies=None,
timeout=10,
cookie=None):
self.set_cookie(cookie)
self.html = Html(headers or self.headers, proxies, timeout)
self.image = Image(self.html)
self.video = Video(self.html)
self.download = Download(path, self.html.headers, proxies)
self.image = Image()
self.video = Video()
self.explore = Explore()
self.download = Download(path, folder, self.html.headers, proxies)
def set_cookie(self, cookie: str):
if cookie:
@ -38,3 +41,9 @@ class XHS:
if download:
self.download.run([url])
return url
def extract(self, url: str):
html = self.html.get_html(url)
if not html:
return None
self.explore.run(html)