优化项目代码

This commit is contained in:
yongquan 2024-01-09 19:18:49 +08:00
parent 039f9f9785
commit 0d22a8f4c0
9 changed files with 65 additions and 29 deletions

View File

@ -143,7 +143,7 @@ async with XHS(work_path=work_path,
<tr>
<td align="center">cookie</td>
<td align="center">str</td>
<td align="center">小红书网页版 Cookie<b>无需登录</b></td>
<td align="center">小红书网页版 Cookie<b>无需登录,建议修改</b></td>
<td align="center">默认 Cookie</td>
</tr>
<tr>

View File

@ -36,5 +36,5 @@ class XHSDownloader(App):
async def action_settings(self):
await self.push_screen("setting")
async def action_back(self):
async def action_index(self):
await self.push_screen("index")

View File

@ -15,7 +15,7 @@ class Setting(Screen):
"static/css/setting.tcss")
BINDINGS = [
Binding(key="q", action="quit", description="退出程序"),
Binding(key="b", action="back", description="返回首页"),
Binding(key="b", action="index", description="返回首页"),
]
def compose(self) -> ComposeResult:

View File

@ -114,11 +114,10 @@ class XHS:
async def __deal_extract(self, url: str, download: bool, log, bar):
logging(log, self.prompt.start_processing(url))
html = await self.html.request_url(url, log=log)
# logging(log, html) # 调试代码
if not html:
namespace = self.__generate_data_object(html)
if not namespace:
logging(log, self.prompt.get_data_failure(url), ERROR)
return {}
namespace = self.__generate_data_object(html)
data = self.explore.run(namespace)
# logging(log, data) # 调试代码
if not data:

View File

@ -1,3 +1,5 @@
from typing import Union
from lxml.etree import HTML
from yaml import safe_load
@ -19,6 +21,8 @@ class Converter:
self.__extract_object(content)))
def __extract_object(self, html: str) -> str:
if not html:
return ""
html_tree = HTML(html)
return d[0] if (d := html_tree.xpath(self.INITIAL_STATE)) else ""
@ -32,6 +36,8 @@ class Converter:
@classmethod
def deep_get(cls, data: dict, keys: list | tuple, default=None):
if not data:
return default
try:
for key in keys:
if key.startswith("[") and key.endswith("]"):
@ -39,11 +45,11 @@ class Converter:
else:
data = data[key]
return data
except (KeyError, IndexError, ValueError):
except (KeyError, IndexError, ValueError, TypeError):
return default
@staticmethod
def safe_get(data: dict | list | tuple | set, index: int):
def safe_get(data: Union[dict, list, tuple, set], index: int):
if isinstance(data, dict):
return list(data.values())[index]
elif isinstance(data, list | tuple | set):

View File

@ -1,12 +1,13 @@
from copy import deepcopy
from types import SimpleNamespace
from typing import Union
__all__ = ["Namespace"]
class Namespace:
def __init__(self, data: dict):
self.data = self.generate_data_object(data)
def __init__(self, data: dict) -> None:
self.data: SimpleNamespace = self.generate_data_object(data)
@staticmethod
def generate_data_object(data: dict) -> SimpleNamespace:
@ -24,21 +25,21 @@ class Namespace:
def safe_extract(
self,
attribute_chain: str,
default: str | int | list | dict | SimpleNamespace = ""):
default: Union[str, int, list, dict, SimpleNamespace] = ""):
return self.__safe_extract(self.data, attribute_chain, default)
@staticmethod
def __safe_extract(
data_object,
data_object: SimpleNamespace,
attribute_chain: str,
default: str | int | list | dict | SimpleNamespace = "", ):
default: Union[str, int, list, dict, SimpleNamespace] = "", ):
data = deepcopy(data_object)
attributes = attribute_chain.split(".")
for attribute in attributes:
if "[" in attribute:
parts = attribute.split("[", 1)
attribute = parts[0]
index = parts[1].split("]", 1)[0]
index = parts[1][:-1]
try:
index = int(index)
data = getattr(data, attribute, None)[index]
@ -55,12 +56,24 @@ class Namespace:
cls,
data_object: SimpleNamespace,
attribute_chain: str,
default: str | int | list | dict | SimpleNamespace = "",
default: Union[str, int, list, dict, SimpleNamespace] = "",
):
return cls.__safe_extract(
data_object,
attribute_chain,
default, )
@property
def __dict__(self):
return vars(self.data)
return self.convert_to_dict(self.data)
@classmethod
def convert_to_dict(cls, data) -> dict:
return {
key: cls.convert_to_dict(value) if isinstance(
value,
SimpleNamespace) else value for key,
value in vars(data).items()}
def __bool__(self):
return bool(vars(self.data))

View File

@ -20,6 +20,7 @@ from .static import (
USERSCRIPT,
USERAGENT,
COOKIE,
HEADERS,
)
from .tools import (
retry,
@ -49,6 +50,7 @@ __all__ = [
"USERSCRIPT",
"USERAGENT",
"COOKIE",
"HEADERS",
"retry",
"logging",
"wait",

View File

@ -12,6 +12,7 @@ from aiohttp import ClientTimeout
from source.translator import Chinese
from source.translator import English
from .static import COOKIE
from .static import HEADERS
from .static import USERAGENT
__all__ = ["Manager"]
@ -40,9 +41,9 @@ class Manager:
self.temp = root.joinpath("./temp")
self.path = self.__check_path(path)
self.folder = self.__check_folder(folder)
self.headers = {
"User-Agent": user_agent or USERAGENT,
"Cookie": cookie or COOKIE}
self.blank_headers = HEADERS | {
"User-Agent": user_agent or USERAGENT, }
self.headers = self.blank_headers | {"Cookie": cookie or COOKIE}
self.retry = retry
self.chunk = chunk
self.record_data = record_data
@ -51,11 +52,11 @@ class Manager:
self.proxy = proxy
self.request_session = ClientSession(
headers=self.headers | {
"Referer": "https://www.xiaohongshu.com/", },
"Referer": "https://www.xiaohongshu.com/explore", },
timeout=ClientTimeout(connect=timeout),
)
self.download_session = ClientSession(
headers={"User-Agent": self.headers["User-Agent"]},
headers=self.blank_headers,
timeout=ClientTimeout(connect=timeout))
self.prompt = language

View File

@ -18,6 +18,7 @@ __all__ = [
"USERSCRIPT",
"USERAGENT",
"COOKIE",
"HEADERS",
]
VERSION_MAJOR = 1
@ -31,17 +32,31 @@ RELEASES = "https://github.com/JoeanAmier/XHS-Downloader/releases/latest"
USERSCRIPT = "https://raw.githubusercontent.com/JoeanAmier/XHS-Downloader/master/static/XHS-Downloader.js"
HEADERS = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,"
"application/signed-exchange;v=b3;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "max-age=0",
"Dnt": "1",
"Sec-Ch-Ua": "\"Not_A Brand\";v=\"8\", \"Chromium\";v=\"120\", \"Microsoft Edge\";v=\"120\"",
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": "\"Windows\"",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
}
USERAGENT = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 "
"Safari/537.36")
"Safari/537.36 Edg/120.0.0.0")
COOKIE = (
"abRequestId=54c534bb-a2c6-558f-8e03-5b4c5c45635c; xsecappid=xhs-pc-web; a1=18c286a400"
"4jy56qvzejvp631col0hd3032h4zjez50000106381; webId=779c977da3a15b5623015be94bdcc9e9; g"
"id=yYSJYK0qDW8KyYSJYK048quV84Vv2KAhudVhJduUKqySlx2818xfq4888y8KqYy8y2y2f8Jy; web_sess"
"ion=030037a259ce5f15c8d560dc12224a9fdc2ed1; webBuild=3.19.4; websectiga=984412fef754c"
"018e472127b8effd174be8a5d51061c991aadd200c69a2801d6; sec_poison_id=3dd48845-d604-4535"
"-bcc2-a859e97518bf; unread={%22ub%22:%22655eb3d60000000032033955%22%2C%22ue%22:%22656"
"e9ef2000000003801ff3d%22%2C%22uc%22:29}; cache_feeds=[]")
"abRequestId=a1c55c3d-edcd-5753-938b-15d22a78cb8a; webBuild=3.23.2; "
"a1=18ceecc41c5d2gkprctahn1jayh458m5eoos9grxb50000267832; webId=79879aaf1b46fa2120dfba20d6155928; "
"websectiga=3fff3a6f9f07284b62c0f2ebf91a3b10193175c06e4f71492b60e056edcdebb2; "
"sec_poison_id=52bff38d-96eb-40b6-a46b-5e7cc86014e4; web_session=030037a2ae3713ec49882425e5224a3cbb4eef; "
"gid=yYSddSS2DKdyyYSddSS4ylkFS2fJkTUFS90xlCDIyV0vxM2842Y62j888JKWYqJ8iDD4KY2d; xsecappid=xhs-pc-web")
MASTER = "b #fff200"
PROMPT = "b turquoise2"