mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2025-12-26 04:48:05 +08:00
优化项目代码
This commit is contained in:
parent
039f9f9785
commit
0d22a8f4c0
@ -143,7 +143,7 @@ async with XHS(work_path=work_path,
|
||||
<tr>
|
||||
<td align="center">cookie</td>
|
||||
<td align="center">str</td>
|
||||
<td align="center">小红书网页版 Cookie,<b>无需登录</b></td>
|
||||
<td align="center">小红书网页版 Cookie,<b>无需登录,建议修改</b></td>
|
||||
<td align="center">默认 Cookie</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
@ -36,5 +36,5 @@ class XHSDownloader(App):
|
||||
async def action_settings(self):
|
||||
await self.push_screen("setting")
|
||||
|
||||
async def action_back(self):
|
||||
async def action_index(self):
|
||||
await self.push_screen("index")
|
||||
|
||||
@ -15,7 +15,7 @@ class Setting(Screen):
|
||||
"static/css/setting.tcss")
|
||||
BINDINGS = [
|
||||
Binding(key="q", action="quit", description="退出程序"),
|
||||
Binding(key="b", action="back", description="返回首页"),
|
||||
Binding(key="b", action="index", description="返回首页"),
|
||||
]
|
||||
|
||||
def compose(self) -> ComposeResult:
|
||||
|
||||
@ -114,11 +114,10 @@ class XHS:
|
||||
async def __deal_extract(self, url: str, download: bool, log, bar):
|
||||
logging(log, self.prompt.start_processing(url))
|
||||
html = await self.html.request_url(url, log=log)
|
||||
# logging(log, html) # 调试代码
|
||||
if not html:
|
||||
namespace = self.__generate_data_object(html)
|
||||
if not namespace:
|
||||
logging(log, self.prompt.get_data_failure(url), ERROR)
|
||||
return {}
|
||||
namespace = self.__generate_data_object(html)
|
||||
data = self.explore.run(namespace)
|
||||
# logging(log, data) # 调试代码
|
||||
if not data:
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
from typing import Union
|
||||
|
||||
from lxml.etree import HTML
|
||||
from yaml import safe_load
|
||||
|
||||
@ -19,6 +21,8 @@ class Converter:
|
||||
self.__extract_object(content)))
|
||||
|
||||
def __extract_object(self, html: str) -> str:
|
||||
if not html:
|
||||
return ""
|
||||
html_tree = HTML(html)
|
||||
return d[0] if (d := html_tree.xpath(self.INITIAL_STATE)) else ""
|
||||
|
||||
@ -32,6 +36,8 @@ class Converter:
|
||||
|
||||
@classmethod
|
||||
def deep_get(cls, data: dict, keys: list | tuple, default=None):
|
||||
if not data:
|
||||
return default
|
||||
try:
|
||||
for key in keys:
|
||||
if key.startswith("[") and key.endswith("]"):
|
||||
@ -39,11 +45,11 @@ class Converter:
|
||||
else:
|
||||
data = data[key]
|
||||
return data
|
||||
except (KeyError, IndexError, ValueError):
|
||||
except (KeyError, IndexError, ValueError, TypeError):
|
||||
return default
|
||||
|
||||
@staticmethod
|
||||
def safe_get(data: dict | list | tuple | set, index: int):
|
||||
def safe_get(data: Union[dict, list, tuple, set], index: int):
|
||||
if isinstance(data, dict):
|
||||
return list(data.values())[index]
|
||||
elif isinstance(data, list | tuple | set):
|
||||
|
||||
@ -1,12 +1,13 @@
|
||||
from copy import deepcopy
|
||||
from types import SimpleNamespace
|
||||
from typing import Union
|
||||
|
||||
__all__ = ["Namespace"]
|
||||
|
||||
|
||||
class Namespace:
|
||||
def __init__(self, data: dict):
|
||||
self.data = self.generate_data_object(data)
|
||||
def __init__(self, data: dict) -> None:
|
||||
self.data: SimpleNamespace = self.generate_data_object(data)
|
||||
|
||||
@staticmethod
|
||||
def generate_data_object(data: dict) -> SimpleNamespace:
|
||||
@ -24,21 +25,21 @@ class Namespace:
|
||||
def safe_extract(
|
||||
self,
|
||||
attribute_chain: str,
|
||||
default: str | int | list | dict | SimpleNamespace = ""):
|
||||
default: Union[str, int, list, dict, SimpleNamespace] = ""):
|
||||
return self.__safe_extract(self.data, attribute_chain, default)
|
||||
|
||||
@staticmethod
|
||||
def __safe_extract(
|
||||
data_object,
|
||||
data_object: SimpleNamespace,
|
||||
attribute_chain: str,
|
||||
default: str | int | list | dict | SimpleNamespace = "", ):
|
||||
default: Union[str, int, list, dict, SimpleNamespace] = "", ):
|
||||
data = deepcopy(data_object)
|
||||
attributes = attribute_chain.split(".")
|
||||
for attribute in attributes:
|
||||
if "[" in attribute:
|
||||
parts = attribute.split("[", 1)
|
||||
attribute = parts[0]
|
||||
index = parts[1].split("]", 1)[0]
|
||||
index = parts[1][:-1]
|
||||
try:
|
||||
index = int(index)
|
||||
data = getattr(data, attribute, None)[index]
|
||||
@ -55,12 +56,24 @@ class Namespace:
|
||||
cls,
|
||||
data_object: SimpleNamespace,
|
||||
attribute_chain: str,
|
||||
default: str | int | list | dict | SimpleNamespace = "",
|
||||
default: Union[str, int, list, dict, SimpleNamespace] = "",
|
||||
):
|
||||
return cls.__safe_extract(
|
||||
data_object,
|
||||
attribute_chain,
|
||||
default, )
|
||||
|
||||
@property
|
||||
def __dict__(self):
|
||||
return vars(self.data)
|
||||
return self.convert_to_dict(self.data)
|
||||
|
||||
@classmethod
|
||||
def convert_to_dict(cls, data) -> dict:
|
||||
return {
|
||||
key: cls.convert_to_dict(value) if isinstance(
|
||||
value,
|
||||
SimpleNamespace) else value for key,
|
||||
value in vars(data).items()}
|
||||
|
||||
def __bool__(self):
|
||||
return bool(vars(self.data))
|
||||
|
||||
@ -20,6 +20,7 @@ from .static import (
|
||||
USERSCRIPT,
|
||||
USERAGENT,
|
||||
COOKIE,
|
||||
HEADERS,
|
||||
)
|
||||
from .tools import (
|
||||
retry,
|
||||
@ -49,6 +50,7 @@ __all__ = [
|
||||
"USERSCRIPT",
|
||||
"USERAGENT",
|
||||
"COOKIE",
|
||||
"HEADERS",
|
||||
"retry",
|
||||
"logging",
|
||||
"wait",
|
||||
|
||||
@ -12,6 +12,7 @@ from aiohttp import ClientTimeout
|
||||
from source.translator import Chinese
|
||||
from source.translator import English
|
||||
from .static import COOKIE
|
||||
from .static import HEADERS
|
||||
from .static import USERAGENT
|
||||
|
||||
__all__ = ["Manager"]
|
||||
@ -40,9 +41,9 @@ class Manager:
|
||||
self.temp = root.joinpath("./temp")
|
||||
self.path = self.__check_path(path)
|
||||
self.folder = self.__check_folder(folder)
|
||||
self.headers = {
|
||||
"User-Agent": user_agent or USERAGENT,
|
||||
"Cookie": cookie or COOKIE}
|
||||
self.blank_headers = HEADERS | {
|
||||
"User-Agent": user_agent or USERAGENT, }
|
||||
self.headers = self.blank_headers | {"Cookie": cookie or COOKIE}
|
||||
self.retry = retry
|
||||
self.chunk = chunk
|
||||
self.record_data = record_data
|
||||
@ -51,11 +52,11 @@ class Manager:
|
||||
self.proxy = proxy
|
||||
self.request_session = ClientSession(
|
||||
headers=self.headers | {
|
||||
"Referer": "https://www.xiaohongshu.com/", },
|
||||
"Referer": "https://www.xiaohongshu.com/explore", },
|
||||
timeout=ClientTimeout(connect=timeout),
|
||||
)
|
||||
self.download_session = ClientSession(
|
||||
headers={"User-Agent": self.headers["User-Agent"]},
|
||||
headers=self.blank_headers,
|
||||
timeout=ClientTimeout(connect=timeout))
|
||||
self.prompt = language
|
||||
|
||||
|
||||
@ -18,6 +18,7 @@ __all__ = [
|
||||
"USERSCRIPT",
|
||||
"USERAGENT",
|
||||
"COOKIE",
|
||||
"HEADERS",
|
||||
]
|
||||
|
||||
VERSION_MAJOR = 1
|
||||
@ -31,17 +32,31 @@ RELEASES = "https://github.com/JoeanAmier/XHS-Downloader/releases/latest"
|
||||
|
||||
USERSCRIPT = "https://raw.githubusercontent.com/JoeanAmier/XHS-Downloader/master/static/XHS-Downloader.js"
|
||||
|
||||
HEADERS = {
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,"
|
||||
"application/signed-exchange;v=b3;q=0.7",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
"Cache-Control": "max-age=0",
|
||||
"Dnt": "1",
|
||||
"Sec-Ch-Ua": "\"Not_A Brand\";v=\"8\", \"Chromium\";v=\"120\", \"Microsoft Edge\";v=\"120\"",
|
||||
"Sec-Ch-Ua-Mobile": "?0",
|
||||
"Sec-Ch-Ua-Platform": "\"Windows\"",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "none",
|
||||
"Sec-Fetch-User": "?1",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
}
|
||||
USERAGENT = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 "
|
||||
"Safari/537.36")
|
||||
"Safari/537.36 Edg/120.0.0.0")
|
||||
COOKIE = (
|
||||
"abRequestId=54c534bb-a2c6-558f-8e03-5b4c5c45635c; xsecappid=xhs-pc-web; a1=18c286a400"
|
||||
"4jy56qvzejvp631col0hd3032h4zjez50000106381; webId=779c977da3a15b5623015be94bdcc9e9; g"
|
||||
"id=yYSJYK0qDW8KyYSJYK048quV84Vv2KAhudVhJduUKqySlx2818xfq4888y8KqYy8y2y2f8Jy; web_sess"
|
||||
"ion=030037a259ce5f15c8d560dc12224a9fdc2ed1; webBuild=3.19.4; websectiga=984412fef754c"
|
||||
"018e472127b8effd174be8a5d51061c991aadd200c69a2801d6; sec_poison_id=3dd48845-d604-4535"
|
||||
"-bcc2-a859e97518bf; unread={%22ub%22:%22655eb3d60000000032033955%22%2C%22ue%22:%22656"
|
||||
"e9ef2000000003801ff3d%22%2C%22uc%22:29}; cache_feeds=[]")
|
||||
"abRequestId=a1c55c3d-edcd-5753-938b-15d22a78cb8a; webBuild=3.23.2; "
|
||||
"a1=18ceecc41c5d2gkprctahn1jayh458m5eoos9grxb50000267832; webId=79879aaf1b46fa2120dfba20d6155928; "
|
||||
"websectiga=3fff3a6f9f07284b62c0f2ebf91a3b10193175c06e4f71492b60e056edcdebb2; "
|
||||
"sec_poison_id=52bff38d-96eb-40b6-a46b-5e7cc86014e4; web_session=030037a2ae3713ec49882425e5224a3cbb4eef; "
|
||||
"gid=yYSddSS2DKdyyYSddSS4ylkFS2fJkTUFS90xlCDIyV0vxM2842Y62j888JKWYqJ8iDD4KY2d; xsecappid=xhs-pc-web")
|
||||
|
||||
MASTER = "b #fff200"
|
||||
PROMPT = "b turquoise2"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user