mirror of
https://github.com/JoeanAmier/XHS-Downloader.git
synced 2025-12-26 04:48:05 +08:00
更新无水印图片采集逻辑
This commit is contained in:
parent
dd2cd73604
commit
72421f6d2b
@ -20,6 +20,8 @@
|
||||
<h1>📸 程序截图</h1>
|
||||
<br>
|
||||
<img src="static/程序截图.png" alt="">
|
||||
<h1>🪟 关于终端</h1>
|
||||
<p>⭐ 推荐使用 <a href="https://learn.microsoft.com/zh-cn/windows/terminal/install">Windows 终端</a> (Windows 11 自带默认终端)运行程序以便获得最佳显示效果!</p>
|
||||
<h1>🥣 使用方法</h1>
|
||||
<p>如果仅需下载作品文件,选择 <b>直接运行</b> 或者 <b>源码运行</b> 均可,如果需要获取作品信息,则需要进行二次开发进行调用。</p>
|
||||
<h2>🖱 直接运行</h2>
|
||||
|
||||
3
main.py
3
main.py
@ -44,6 +44,7 @@ def example():
|
||||
|
||||
|
||||
class XHSDownloader(App):
|
||||
VERSION = 1.5
|
||||
CSS_PATH = "static/XHS_Downloader.tcss"
|
||||
BINDINGS = [
|
||||
Binding(key="q", action="quit", description="退出程序"),
|
||||
@ -63,7 +64,7 @@ class XHSDownloader(App):
|
||||
yield Footer()
|
||||
|
||||
def on_mount(self) -> None:
|
||||
self.title = "小红书作品采集工具"
|
||||
self.title = f"小红书作品采集工具 V{self.VERSION}"
|
||||
|
||||
def on_button_pressed(self, event: Button.Pressed) -> None:
|
||||
if event.button.id == "solo":
|
||||
|
||||
@ -1,13 +1,12 @@
|
||||
from json import loads
|
||||
from re import compile
|
||||
|
||||
from .Html import Html
|
||||
|
||||
__all__ = ['Image']
|
||||
|
||||
|
||||
class Image:
|
||||
IMAGE_INFO = compile(r'("infoList":\[\{.*?}])')
|
||||
IMAGE_TOKEN = compile(
|
||||
r"http://sns-webpic-qc.xhscdn.com/\d+/\w+/(\w+)!")
|
||||
|
||||
def get_image_link(self, html: str) -> list:
|
||||
data = self.__extract_image_data(html)
|
||||
@ -22,11 +21,18 @@ class Image:
|
||||
return [loads(f"{{{i}}}") for i in data]
|
||||
|
||||
@staticmethod
|
||||
def __extract_image_urls(data: list[dict]) -> list[str]:
|
||||
def __generate_image_link(token: str) -> str:
|
||||
return f"https://ci.xiaohongshu.com/{token}?imageView2/2/w/format/png"
|
||||
|
||||
def __extract_image_token(self, url: str) -> str:
|
||||
return self.__generate_image_link(token[0]) if len(
|
||||
token := self.IMAGE_TOKEN.findall(url)) == 1 else ""
|
||||
|
||||
def __extract_image_urls(self, data: list[dict]) -> list[str]:
|
||||
urls = []
|
||||
for i in data:
|
||||
for j in i.get("infoList", []):
|
||||
if j.get("imageScene", "").startswith("CRD_WM_"):
|
||||
urls.append(j.get("url", ""))
|
||||
urls.append(self.__extract_image_token(j.get("url", "")))
|
||||
break
|
||||
return [Html.format_url(i) for i in urls if i]
|
||||
return [i for i in urls if i]
|
||||
|
||||
@ -17,13 +17,11 @@ class XHS:
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome"
|
||||
"/116.0.0.0 Safari/537.36",
|
||||
"Cookie": "abRequestId=8bf8c305-4e5b-52dd-a723-e9a5343ab42d; webBuild=3.11.3; xsecappid=xhs-pc-web"
|
||||
"; a1=18b5003660f88kld0gicpx2on2oksepl5y3r9htmn50000247235; webId=a2b049d2fa82434385976a4"
|
||||
"9814085b0; websectiga=16f444b9ff5e3d7e258b5f7674489196303a0b160e16647c6c2b4dcb609f4134; s"
|
||||
"ec_poison_id=3d97668d-1e50-42a7-89c0-7b46aa0183a5; web_session=030037a2006067cf0a2b5f724b"
|
||||
"224a1819eb36; gid=yYD288qW4824yYD288qKKvx98iYYT1f8k3SUq6ICJlIJlF28TMdU1j888J4WJq28fqDJiqq"
|
||||
"0; unread={%22ub%22:%22650533d4000000001302862b%22%2C%22ue%22:%2265015857000000001e021a5e"
|
||||
"%22%2C%22uc%22:42}",
|
||||
"Cookie": "abRequestId=c76828f5-4f37-5b3b-8cc3-036eb91b2edb; webBuild=3.14.1; xsecappid=xhs-pc-web; "
|
||||
"a1=18ba9b2b23co9uwihz4adkebwsw05g8upycgsldyj50000141248; webId=23ee7745020025247828cf8d6d0decff; "
|
||||
"websectiga=6169c1e84f393779a5f7de7303038f3b47a78e47be716e7bec57ccce17d45f99; "
|
||||
"sec_poison_id=ae001863-a9db-4463-ad78-ede3aac4e5b1; gid=yYD0jDJDWyU4yYD0jDJDJv1fqSlj7E3xu40fSvVTd"
|
||||
"DEMEk2882kY7M888y4yJ4Y8D8SK0iiK; web_session=030037a2797dde5008c3e66f32224a8af75429; ",
|
||||
}
|
||||
links = compile(r"https://www.xiaohongshu.com/explore/[0-9a-z]+")
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user