优化项目代码

This commit is contained in:
yongquan
2024-01-09 19:18:49 +08:00
parent 039f9f9785
commit 0d22a8f4c0
9 changed files with 65 additions and 29 deletions

View File

@@ -143,7 +143,7 @@ async with XHS(work_path=work_path,
<tr> <tr>
<td align="center">cookie</td> <td align="center">cookie</td>
<td align="center">str</td> <td align="center">str</td>
<td align="center">小红书网页版 Cookie<b>无需登录</b></td> <td align="center">小红书网页版 Cookie<b>无需登录,建议修改</b></td>
<td align="center">默认 Cookie</td> <td align="center">默认 Cookie</td>
</tr> </tr>
<tr> <tr>

View File

@@ -36,5 +36,5 @@ class XHSDownloader(App):
async def action_settings(self): async def action_settings(self):
await self.push_screen("setting") await self.push_screen("setting")
async def action_back(self): async def action_index(self):
await self.push_screen("index") await self.push_screen("index")

View File

@@ -15,7 +15,7 @@ class Setting(Screen):
"static/css/setting.tcss") "static/css/setting.tcss")
BINDINGS = [ BINDINGS = [
Binding(key="q", action="quit", description="退出程序"), Binding(key="q", action="quit", description="退出程序"),
Binding(key="b", action="back", description="返回首页"), Binding(key="b", action="index", description="返回首页"),
] ]
def compose(self) -> ComposeResult: def compose(self) -> ComposeResult:

View File

@@ -114,11 +114,10 @@ class XHS:
async def __deal_extract(self, url: str, download: bool, log, bar): async def __deal_extract(self, url: str, download: bool, log, bar):
logging(log, self.prompt.start_processing(url)) logging(log, self.prompt.start_processing(url))
html = await self.html.request_url(url, log=log) html = await self.html.request_url(url, log=log)
# logging(log, html) # 调试代码 namespace = self.__generate_data_object(html)
if not html: if not namespace:
logging(log, self.prompt.get_data_failure(url), ERROR) logging(log, self.prompt.get_data_failure(url), ERROR)
return {} return {}
namespace = self.__generate_data_object(html)
data = self.explore.run(namespace) data = self.explore.run(namespace)
# logging(log, data) # 调试代码 # logging(log, data) # 调试代码
if not data: if not data:

View File

@@ -1,3 +1,5 @@
from typing import Union
from lxml.etree import HTML from lxml.etree import HTML
from yaml import safe_load from yaml import safe_load
@@ -19,6 +21,8 @@ class Converter:
self.__extract_object(content))) self.__extract_object(content)))
def __extract_object(self, html: str) -> str: def __extract_object(self, html: str) -> str:
if not html:
return ""
html_tree = HTML(html) html_tree = HTML(html)
return d[0] if (d := html_tree.xpath(self.INITIAL_STATE)) else "" return d[0] if (d := html_tree.xpath(self.INITIAL_STATE)) else ""
@@ -32,6 +36,8 @@ class Converter:
@classmethod @classmethod
def deep_get(cls, data: dict, keys: list | tuple, default=None): def deep_get(cls, data: dict, keys: list | tuple, default=None):
if not data:
return default
try: try:
for key in keys: for key in keys:
if key.startswith("[") and key.endswith("]"): if key.startswith("[") and key.endswith("]"):
@@ -39,11 +45,11 @@ class Converter:
else: else:
data = data[key] data = data[key]
return data return data
except (KeyError, IndexError, ValueError): except (KeyError, IndexError, ValueError, TypeError):
return default return default
@staticmethod @staticmethod
def safe_get(data: dict | list | tuple | set, index: int): def safe_get(data: Union[dict, list, tuple, set], index: int):
if isinstance(data, dict): if isinstance(data, dict):
return list(data.values())[index] return list(data.values())[index]
elif isinstance(data, list | tuple | set): elif isinstance(data, list | tuple | set):

View File

@@ -1,12 +1,13 @@
from copy import deepcopy from copy import deepcopy
from types import SimpleNamespace from types import SimpleNamespace
from typing import Union
__all__ = ["Namespace"] __all__ = ["Namespace"]
class Namespace: class Namespace:
def __init__(self, data: dict): def __init__(self, data: dict) -> None:
self.data = self.generate_data_object(data) self.data: SimpleNamespace = self.generate_data_object(data)
@staticmethod @staticmethod
def generate_data_object(data: dict) -> SimpleNamespace: def generate_data_object(data: dict) -> SimpleNamespace:
@@ -24,21 +25,21 @@ class Namespace:
def safe_extract( def safe_extract(
self, self,
attribute_chain: str, attribute_chain: str,
default: str | int | list | dict | SimpleNamespace = ""): default: Union[str, int, list, dict, SimpleNamespace] = ""):
return self.__safe_extract(self.data, attribute_chain, default) return self.__safe_extract(self.data, attribute_chain, default)
@staticmethod @staticmethod
def __safe_extract( def __safe_extract(
data_object, data_object: SimpleNamespace,
attribute_chain: str, attribute_chain: str,
default: str | int | list | dict | SimpleNamespace = "", ): default: Union[str, int, list, dict, SimpleNamespace] = "", ):
data = deepcopy(data_object) data = deepcopy(data_object)
attributes = attribute_chain.split(".") attributes = attribute_chain.split(".")
for attribute in attributes: for attribute in attributes:
if "[" in attribute: if "[" in attribute:
parts = attribute.split("[", 1) parts = attribute.split("[", 1)
attribute = parts[0] attribute = parts[0]
index = parts[1].split("]", 1)[0] index = parts[1][:-1]
try: try:
index = int(index) index = int(index)
data = getattr(data, attribute, None)[index] data = getattr(data, attribute, None)[index]
@@ -55,12 +56,24 @@ class Namespace:
cls, cls,
data_object: SimpleNamespace, data_object: SimpleNamespace,
attribute_chain: str, attribute_chain: str,
default: str | int | list | dict | SimpleNamespace = "", default: Union[str, int, list, dict, SimpleNamespace] = "",
): ):
return cls.__safe_extract( return cls.__safe_extract(
data_object, data_object,
attribute_chain, attribute_chain,
default, ) default, )
@property
def __dict__(self): def __dict__(self):
return vars(self.data) return self.convert_to_dict(self.data)
@classmethod
def convert_to_dict(cls, data) -> dict:
return {
key: cls.convert_to_dict(value) if isinstance(
value,
SimpleNamespace) else value for key,
value in vars(data).items()}
def __bool__(self):
return bool(vars(self.data))

View File

@@ -20,6 +20,7 @@ from .static import (
USERSCRIPT, USERSCRIPT,
USERAGENT, USERAGENT,
COOKIE, COOKIE,
HEADERS,
) )
from .tools import ( from .tools import (
retry, retry,
@@ -49,6 +50,7 @@ __all__ = [
"USERSCRIPT", "USERSCRIPT",
"USERAGENT", "USERAGENT",
"COOKIE", "COOKIE",
"HEADERS",
"retry", "retry",
"logging", "logging",
"wait", "wait",

View File

@@ -12,6 +12,7 @@ from aiohttp import ClientTimeout
from source.translator import Chinese from source.translator import Chinese
from source.translator import English from source.translator import English
from .static import COOKIE from .static import COOKIE
from .static import HEADERS
from .static import USERAGENT from .static import USERAGENT
__all__ = ["Manager"] __all__ = ["Manager"]
@@ -40,9 +41,9 @@ class Manager:
self.temp = root.joinpath("./temp") self.temp = root.joinpath("./temp")
self.path = self.__check_path(path) self.path = self.__check_path(path)
self.folder = self.__check_folder(folder) self.folder = self.__check_folder(folder)
self.headers = { self.blank_headers = HEADERS | {
"User-Agent": user_agent or USERAGENT, "User-Agent": user_agent or USERAGENT, }
"Cookie": cookie or COOKIE} self.headers = self.blank_headers | {"Cookie": cookie or COOKIE}
self.retry = retry self.retry = retry
self.chunk = chunk self.chunk = chunk
self.record_data = record_data self.record_data = record_data
@@ -51,11 +52,11 @@ class Manager:
self.proxy = proxy self.proxy = proxy
self.request_session = ClientSession( self.request_session = ClientSession(
headers=self.headers | { headers=self.headers | {
"Referer": "https://www.xiaohongshu.com/", }, "Referer": "https://www.xiaohongshu.com/explore", },
timeout=ClientTimeout(connect=timeout), timeout=ClientTimeout(connect=timeout),
) )
self.download_session = ClientSession( self.download_session = ClientSession(
headers={"User-Agent": self.headers["User-Agent"]}, headers=self.blank_headers,
timeout=ClientTimeout(connect=timeout)) timeout=ClientTimeout(connect=timeout))
self.prompt = language self.prompt = language

View File

@@ -18,6 +18,7 @@ __all__ = [
"USERSCRIPT", "USERSCRIPT",
"USERAGENT", "USERAGENT",
"COOKIE", "COOKIE",
"HEADERS",
] ]
VERSION_MAJOR = 1 VERSION_MAJOR = 1
@@ -31,17 +32,31 @@ RELEASES = "https://github.com/JoeanAmier/XHS-Downloader/releases/latest"
USERSCRIPT = "https://raw.githubusercontent.com/JoeanAmier/XHS-Downloader/master/static/XHS-Downloader.js" USERSCRIPT = "https://raw.githubusercontent.com/JoeanAmier/XHS-Downloader/master/static/XHS-Downloader.js"
HEADERS = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,"
"application/signed-exchange;v=b3;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "max-age=0",
"Dnt": "1",
"Sec-Ch-Ua": "\"Not_A Brand\";v=\"8\", \"Chromium\";v=\"120\", \"Microsoft Edge\";v=\"120\"",
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": "\"Windows\"",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
}
USERAGENT = ( USERAGENT = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 " "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 "
"Safari/537.36") "Safari/537.36 Edg/120.0.0.0")
COOKIE = ( COOKIE = (
"abRequestId=54c534bb-a2c6-558f-8e03-5b4c5c45635c; xsecappid=xhs-pc-web; a1=18c286a400" "abRequestId=a1c55c3d-edcd-5753-938b-15d22a78cb8a; webBuild=3.23.2; "
"4jy56qvzejvp631col0hd3032h4zjez50000106381; webId=779c977da3a15b5623015be94bdcc9e9; g" "a1=18ceecc41c5d2gkprctahn1jayh458m5eoos9grxb50000267832; webId=79879aaf1b46fa2120dfba20d6155928; "
"id=yYSJYK0qDW8KyYSJYK048quV84Vv2KAhudVhJduUKqySlx2818xfq4888y8KqYy8y2y2f8Jy; web_sess" "websectiga=3fff3a6f9f07284b62c0f2ebf91a3b10193175c06e4f71492b60e056edcdebb2; "
"ion=030037a259ce5f15c8d560dc12224a9fdc2ed1; webBuild=3.19.4; websectiga=984412fef754c" "sec_poison_id=52bff38d-96eb-40b6-a46b-5e7cc86014e4; web_session=030037a2ae3713ec49882425e5224a3cbb4eef; "
"018e472127b8effd174be8a5d51061c991aadd200c69a2801d6; sec_poison_id=3dd48845-d604-4535" "gid=yYSddSS2DKdyyYSddSS4ylkFS2fJkTUFS90xlCDIyV0vxM2842Y62j888JKWYqJ8iDD4KY2d; xsecappid=xhs-pc-web")
"-bcc2-a859e97518bf; unread={%22ub%22:%22655eb3d60000000032033955%22%2C%22ue%22:%22656"
"e9ef2000000003801ff3d%22%2C%22uc%22:29}; cache_feeds=[]")
MASTER = "b #fff200" MASTER = "b #fff200"
PROMPT = "b turquoise2" PROMPT = "b turquoise2"