fix: 修复项目功能异常

1. 更新 Cookie 参数处理 2. 优化作品数据返回格式 3. 更新用户脚本链接提取功能 4. 优化项目运行信息提示 5. 修复 record_data 参数无效的问题 6. 新增作品下载记录开关 7. 适配小红书平台规则 8. 默认开启局域网访问 9. 格式化项目代码 10. 更新英语翻译 Closes #127 Closes #128 Closes #130 Closes #132
2025-12-26 04:48:05 +08:00 · 2024-08-02 21:22:00 +08:00 · 2024-08-02 21:22:00 +08:00 · 3e8b69f8e1
commit 3e8b69f8e1
parent dd5c09389a
16 changed files with 171 additions and 69 deletions
--- a/locale/en_GB/LC_MESSAGES/xhs.mo
+++ b/locale/en_GB/LC_MESSAGES/xhs.mo
--- a/locale/en_GB/LC_MESSAGES/xhs.po
+++ b/locale/en_GB/LC_MESSAGES/xhs.po
@ -85,7 +85,7 @@ msgstr "Xiaohongshu Web Cookie"
 msgid "网络代理"
 msgstr "Network proxy"

-msgid "记录作品数据"
+msgid "记录作品详细数据"
 msgstr "Record works data"

 msgid "图片下载格式"
@ -291,3 +291,6 @@ msgstr "Agent {0} test failed: {1}"

 msgid "浏览器名称或序号输入错误！"
 msgstr "Browser name or serial number input error!"
+
+msgid "作品下载记录开关"
+msgstr "Works download record switch"
--- a/locale/zh_CN/LC_MESSAGES/xhs.po
+++ b/locale/zh_CN/LC_MESSAGES/xhs.po
@ -85,7 +85,7 @@ msgstr ""
 msgid "网络代理"
 msgstr ""

-msgid "记录作品数据"
+msgid "记录作品详细数据"
 msgstr ""

 msgid "图片下载格式"
@ -291,3 +291,6 @@ msgstr ""

 msgid "浏览器名称或序号输入错误！"
 msgstr ""
+
+msgid "作品下载记录开关"
+msgstr ""
--- a/source/TUI/setting.py
+++ b/source/TUI/setting.py
@ -61,7 +61,7 @@ class Setting(Screen):
            Input(str(self.data["max_retry"]), placeholder="5", type="integer", id="max_retry", ),
            Label(),
            Container(
-                Checkbox(self.message("记录作品数据"), id="record_data", value=self.data["record_data"], ),
+                Checkbox(self.message("记录作品详细数据"), id="record_data", value=self.data["record_data"], ),
                Checkbox(self.message("作品文件夹归档模式"), id="folder_mode", value=self.data["folder_mode"], ),
                Checkbox(self.message("视频作品下载开关"), id="video_download", value=self.data["video_download"], ),
                Checkbox(self.message("图文作品下载开关"), id="image_download", value=self.data["image_download"], ),
@ -69,6 +69,7 @@ class Setting(Screen):
            Label(),
            Container(
                Checkbox(self.message("动图文件下载开关"), id="live_download", value=self.data["live_download"], ),
+                Checkbox(self.message("作品下载记录开关"), id="download_record", value=self.data["download_record"], ),
                classes="horizontal-layout"),
            Container(
                Label(self.message("图片下载格式"), classes="params", ),
@ -123,6 +124,7 @@ class Setting(Screen):
            "image_download": self.query_one("#image_download").value,
            "video_download": self.query_one("#video_download").value,
            "live_download": self.query_one("#live_download").value,
+            "download_record": self.query_one("#download_record").value,
            # "server": False,
        })

--- a/source/TUI/update.py
+++ b/source/TUI/update.py
@ -10,9 +10,6 @@ from textual.widgets import LoadingIndicator

 from source.application import XHS
 from source.module import (
-    VERSION_MAJOR,
-    VERSION_MINOR,
-    VERSION_BETA,
    ERROR,
    WARNING,
    INFO,
@ -40,10 +37,10 @@ class Update(ModalScreen):
        try:
            url = await self.xhs.html.request_url(RELEASES, False, None, timeout=5, )
            version = url.split("/")[-1]
-            match self.compare_versions(f"{VERSION_MAJOR}.{VERSION_MINOR}", version, VERSION_BETA):
+            match self.compare_versions(f"{XHS.VERSION_MAJOR}.{XHS.VERSION_MINOR}", version, XHS.VERSION_BETA):
                case 4:
                    tip = Text(f"{self.message("检测到新版本：{0}.{1}").format(
-                        VERSION_MAJOR, VERSION_MINOR)}\n{RELEASES}", style=WARNING)
+                        XHS.VERSION_MAJOR, XHS.VERSION_MINOR)}\n{RELEASES}", style=WARNING)
                case 3:
                    tip = Text(
                        f"{self.message("当前版本为开发版, 可更新至正式版")}\n{RELEASES}",
--- a/source/application/app.py
+++ b/source/application/app.py
@ -32,6 +32,7 @@ from source.module import (
    REPOSITORY,
    VERSION_MAJOR,
    VERSION_MINOR,
+    VERSION_BETA,
 )
 from source.module import Translate
 from source.module import logging
@ -44,10 +45,25 @@ from .video import Video
 __all__ = ["XHS"]


+def _data_cache(function):
+    async def inner(self, data: dict, ):
+        if self.manager.record_data:
+            download = data["下载地址"]
+            lives = data["动图地址"]
+            await function(self, data, )
+            data["下载地址"] = download
+            data["动图地址"] = lives
+
+    return inner
+
+
 class XHS:
-    LINK = compile(r"https?://www\.xiaohongshu\.com/explore/[a-z0-9]+")
-    SHARE = compile(r"https?://www\.xiaohongshu\.com/discovery/item/[a-z0-9]+")
-    SHORT = compile(r"https?://xhslink\.com/[A-Za-z0-9]+")
+    VERSION_MAJOR = VERSION_MAJOR
+    VERSION_MINOR = VERSION_MINOR
+    VERSION_BETA = VERSION_BETA
+    LINK = compile(r"https?://www\.xiaohongshu\.com/explore/\S+")
+    SHARE = compile(r"https?://www\.xiaohongshu\.com/discovery/item/\S+")
+    SHORT = compile(r"https?://xhslink\.com/\S+")
    __INSTANCE = None

    def __new__(cls, *args, **kwargs):
@ -74,6 +90,7 @@ class XHS:
            video_download=True,
            live_download=False,
            folder_mode=False,
+            download_record=True,
            language="zh_CN",
            # server=False,
            transition: Callable[[str], str] = None,
@ -101,6 +118,7 @@ class XHS:
            image_download,
            video_download,
            live_download,
+            download_record,
            folder_mode,
            # server,
            self.message,
@ -127,7 +145,7 @@ class XHS:

    def __extract_video(self, container: dict, data: Namespace):
        container["下载地址"] = self.video.get_video_link(data)
-        container["动图地址"] = ""
+        container["动图地址"] = [None, ]

    async def __download_files(self, container: dict, download: bool, index, log, bar):
        name = self.__naming_rules(container)
@ -136,13 +154,21 @@ class XHS:
                logging(
                    log, self.message("作品 {0} 存在下载记录，跳过下载").format(i))
            else:
-                path, result = await self.download.run(u, container["动图地址"], index, name, container["作品类型"],
-                                                       log, bar)
+                path, result = await self.download.run(
+                    u,
+                    container["动图地址"],
+                    index,
+                    name,
+                    container["作品类型"],
+                    log,
+                    bar,
+                )
                await self.__add_record(i, result)
        elif not u:
            logging(log, self.message("提取作品文件下载地址失败"), ERROR)
        await self.save_data(container)

+    @_data_cache
    async def save_data(self, data: dict, ):
        data["采集时间"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        data["下载地址"] = " ".join(data["下载地址"])
@ -196,20 +222,20 @@ class XHS:
        return urls

    async def __deal_extract(self, url: str, download: bool, index: list | tuple | None, log, bar, data: bool, ):
-        if not data and await self.skip_download(i := self.__extract_link_id(url)):
+        if await self.skip_download(i := self.__extract_link_id(url)) and not data:
            msg = self.message("作品 {0} 存在下载记录，跳过处理").format(i)
            logging(log, msg)
            return {"message": msg}
-        logging(log, self.message("开始处理作品：{0}").format(url))
+        logging(log, self.message("开始处理作品：{0}").format(i))
        html = await self.html.request_url(url, log=log)
        namespace = self.__generate_data_object(html)
        if not namespace:
-            logging(log, self.message("{0} 获取数据失败").format(url), ERROR)
+            logging(log, self.message("{0} 获取数据失败").format(i), ERROR)
            return {}
        data = self.explore.run(namespace)
        # logging(log, data)  # 调试代码
        if not data:
-            logging(log, self.message("{0} 提取数据失败").format(url), ERROR)
+            logging(log, self.message("{0} 提取数据失败").format(i), ERROR)
            return {}
        match data["作品类型"]:
            case "视频":
@ -219,7 +245,7 @@ class XHS:
            case _:
                data["下载地址"] = []
        await self.__download_files(data, download, index, log, bar)
-        logging(log, self.message("作品处理完成：{0}").format(url))
+        logging(log, self.message("作品处理完成：{0}").format(i))
        return data

    @staticmethod
@ -352,7 +378,7 @@ class XHS:
    #     await self.runner.cleanup()
    #     logging(log, self.message("Web API 服务器已关闭！"))

-    async def run_server(self, host="127.0.0.1", port=8000, log_level="info", ):
+    async def run_server(self, host="0.0.0.0", port=8000, log_level="info", ):
        self.server = FastAPI(
            title="XHS-Downloader",
            version=f"{VERSION_MAJOR}.{VERSION_MINOR}")
--- a/source/application/download.py
+++ b/source/application/download.py
@ -37,8 +37,16 @@ class Download:
        self.video_download = manager.video_download
        self.live_download = manager.live_download

-    async def run(self, urls: list, lives: list, index: list | tuple | None, name: str, type_: str, log, bar) -> tuple[
-        Path, tuple]:
+    async def run(
+            self,
+            urls: list,
+            lives: list,
+            index: list | tuple | None,
+            name: str,
+            type_: str,
+            log,
+            bar,
+    ) -> tuple[Path, tuple]:
        path = self.__generate_path(name)
        match type_:
            case "视频":
--- a/source/application/explore.py
+++ b/source/application/explore.py
@ -40,8 +40,7 @@ class Explore:

    def __extract_info(self, container: dict, data: Namespace):
        container["作品ID"] = data.safe_extract("noteId")
-        container["作品链接"] = f"https://www.xiaohongshu.com/explore/{
-        container["作品ID"]}"
+        container["作品链接"] = f"https://www.xiaohongshu.com/explore/{container["作品ID"]}"
        container["作品标题"] = data.safe_extract("title")
        container["作品描述"] = data.safe_extract("desc")
        container["作品类型"] = self.explore_type.get(
--- a/source/application/image.py
+++ b/source/application/image.py
@ -37,9 +37,12 @@ class Image:

    @staticmethod
    def __get_live_link(items: list) -> list:
-        links = []
-        for item in items:
-            links.append(
-                Html.format_url(Namespace.object_extract(
-                    item, "stream.h264[0].masterUrl")))
-        return links
+        return [
+            (
+                    Html.format_url(
+                        Namespace.object_extract(item, "stream.h264[0].masterUrl")
+                    )
+                    or None
+            )
+            for item in items
+        ]
--- a/source/application/request.py
+++ b/source/application/request.py
@ -13,6 +13,8 @@ class Html:
        self.retry = manager.retry
        self.message = manager.message
        self.client = manager.request_client
+        self.headers = manager.headers
+        self.blank_headers = manager.blank_headers

    @retry
    async def request_url(
@ -25,6 +27,7 @@ class Html:
        try:
            response = await self.client.get(
                url,
+                headers=self.select_headers(url, ),
                **kwargs,
            )
            response.raise_for_status()
@ -38,3 +41,6 @@ class Html:
    @staticmethod
    def format_url(url: str) -> str:
        return bytes(url, "utf-8").decode("unicode_escape")
+
+    def select_headers(self, url: str) -> dict:
+        return self.blank_headers if "discovery/item" in url else self.headers
--- a/source/expansion/init.py
+++ b/source/expansion/init.py
@ -1,5 +1,6 @@
 from .browser import BrowserCookie
 from .converter import Converter
 from .namespace import Namespace
-
-__all__ = ["Converter", "Namespace", "BrowserCookie", ]
+from .truncate import beautify_string
+from .truncate import trim_string
+from .truncate import truncate_string
--- a/source/expansion/truncate.py
+++ b/source/expansion/truncate.py
@ -0,0 +1,35 @@
+from unicodedata import name
+
+
+def is_chinese_char(char: str) -> bool:
+    return 'CJK' in name(char, "")
+
+
+def truncate_string(s: str, length: int = 64) -> str:
+    count = 0
+    result = ""
+    for char in s:
+        count += 2 if is_chinese_char(char) else 1
+        if count > length:
+            break
+        result += char
+    return result
+
+
+def trim_string(s: str, length: int = 64) -> str:
+    length = length // 2 - 2
+    return f"{s[:length]}...{s[-length:]}" if len(s) > length else s
+
+
+def beautify_string(s: str, length: int = 64) -> str:
+    count = 0
+    for char in s:
+        count += 2 if is_chinese_char(char) else 1
+        if count > length:
+            break
+    else:
+        return s
+    length //= 2
+    start = truncate_string(s, length)
+    end = truncate_string(s[::-1], length)[::-1]
+    return f"{start}...{end}"
--- a/source/module/manager.py
+++ b/source/module/manager.py
@ -6,6 +6,7 @@ from shutil import rmtree
 from typing import Callable

 from httpx import AsyncClient
+from httpx import HTTPStatusError
 from httpx import RequestError
 from httpx import TimeoutException
 from httpx import get
@ -64,6 +65,7 @@ class Manager:
            image_download: bool,
            video_download: bool,
            live_download: bool,
+            download_record: bool,
            folder_mode: bool,
            # server: bool,
            transition: Callable[[str], str],
@ -80,7 +82,7 @@ class Manager:
            "Sec-Ch-Ua-Platform": sec_ch_ua_platform or SEC_CH_UA_PLATFORM,
        }
        self.headers = self.blank_headers | {
-            "Cookie": self.clean_cookie(cookie),
+            "Cookie": cookie,
        }
        self.retry = retry
        self.chunk = chunk
@ -88,6 +90,7 @@ class Manager:
        self.record_data = self.check_bool(record_data, False)
        self.image_format = self.__check_image_format(image_format)
        self.folder_mode = self.check_bool(folder_mode, False)
+        self.download_record = self.check_bool(download_record, True)
        self.proxy_tip = None
        self.proxy = self.__check_proxy(proxy)
        self.print_proxy_tip(_print, )
@ -200,13 +203,16 @@ class Manager:
            response = get(
                url,
                **kwarg, )
-            if response.status_code < 400:
-                self.proxy_tip = (self.message("代理 {0} 测试成功").format(proxy),)
-                return kwarg
+            response.raise_for_status()
+            self.proxy_tip = (self.message("代理 {0} 测试成功").format(proxy),)
+            return kwarg
        except TimeoutException:
            self.proxy_tip = (
                self.message("代理 {0} 测试超时").format(proxy), WARNING,)
-        except RequestError as e:
+        except (
+                RequestError,
+                HTTPStatusError,
+        ) as e:
            self.proxy_tip = (
                self.message("代理 {0} 测试失败：{1}").format(
                    proxy, e), WARNING,)
@ -218,17 +224,19 @@ class Manager:

    @classmethod
    def clean_cookie(cls, cookie_string: str) -> str:
-        for i in (
+        return cls.delete_cookie(
+            cookie_string,
+            (
                cls.WEB_ID,
                cls.WEB_SESSION,
-        ):
-            cookie_string = cls.delete_cookie(cookie_string, i)
-        return cookie_string
+            ),
+        )

    @classmethod
-    def delete_cookie(cls, cookie_string: str, pattern) -> str:
-        # 使用空字符串替换匹配到的部分
-        cookie_string = sub(pattern, "", cookie_string)
+    def delete_cookie(cls, cookie_string: str, patterns: list | tuple) -> str:
+        for pattern in patterns:
+            # 使用空字符串替换匹配到的部分
+            cookie_string = sub(pattern, "", cookie_string)
        # 去除多余的分号和空格
        cookie_string = sub(r';\s*$', "", cookie_string)  # 删除末尾的分号和空格
        cookie_string = sub(r';\s*;', ";", cookie_string)  # 删除中间多余分号后的空格
--- a/source/module/recorder.py
+++ b/source/module/recorder.py
@ -1,3 +1,5 @@
+from asyncio import CancelledError
+from contextlib import suppress
 from re import compile

 from aiosqlite import connect
@ -12,6 +14,7 @@ class IDRecorder:

    def __init__(self, manager: Manager):
        self.file = manager.root.joinpath("ExploreID.db")
+        self.switch = manager.download_record
        self.database = None
        self.cursor = None

@ -22,12 +25,14 @@ class IDRecorder:
        await self.database.commit()

    async def select(self, id_: str):
-        await self.cursor.execute("SELECT ID FROM explore_id WHERE ID=?", (id_,))
-        return await self.cursor.fetchone()
+        if self.switch:
+            await self.cursor.execute("SELECT ID FROM explore_id WHERE ID=?", (id_,))
+            return await self.cursor.fetchone()

    async def add(self, id_: str) -> None:
-        await self.database.execute("REPLACE INTO explore_id VALUES (?);", (id_,))
-        await self.database.commit()
+        if self.switch:
+            await self.database.execute("REPLACE INTO explore_id VALUES (?);", (id_,))
+            await self.database.commit()

    async def __delete(self, id_: str) -> None:
        if id_:
@ -35,19 +40,22 @@ class IDRecorder:
            await self.database.commit()

    async def delete(self, ids: str):
-        ids = [i.group(1) for i in self.URL.finditer(ids)]
-        [await self.__delete(i) for i in ids]
+        if self.switch:
+            ids = [i.group(1) for i in self.URL.finditer(ids)]
+            [await self.__delete(i) for i in ids]

    async def all(self):
-        await self.cursor.execute("SELECT ID FROM explore_id")
-        return [i[0] for i in await self.cursor.fetchmany()]
+        if self.switch:
+            await self.cursor.execute("SELECT ID FROM explore_id")
+            return [i[0] for i in await self.cursor.fetchmany()]

    async def __aenter__(self):
        await self._connect_database()
        return self

    async def __aexit__(self, exc_type, exc_value, traceback):
-        await self.cursor.close()
+        with suppress(CancelledError):
+            await self.cursor.close()
        await self.database.close()


@ -76,6 +84,7 @@ class DataRecorder(IDRecorder):
    def __init__(self, manager: Manager):
        super().__init__(manager)
        self.file = manager.folder.joinpath("ExploreData.db")
+        self.switch = manager.record_data

    async def _connect_database(self):
        self.database = await connect(self.file)
@ -89,12 +98,13 @@ class DataRecorder(IDRecorder):
        pass

    async def add(self, **kwargs) -> None:
-        await self.database.execute(f"""REPLACE INTO explore_data (
+        if self.switch:
+            await self.database.execute(f"""REPLACE INTO explore_data (
        {", ".join(i[0] for i in self.DATA_TABLE)}
        ) VALUES (
        {", ".join("?" for _ in kwargs)}
        );""", self.__generate_values(kwargs))
-        await self.database.commit()
+            await self.database.commit()

    async def __delete(self, id_: str) -> None:
        pass
--- a/source/module/settings.py
+++ b/source/module/settings.py
@ -30,6 +30,7 @@ class Settings:
        "video_download": True,
        "live_download": False,
        "folder_mode": False,
+        "download_record": True,
        "language": "zh_CN",
        # "server": False,
    }
--- a/static/XHS-Downloader.js
+++ b/static/XHS-Downloader.js
@ -1,7 +1,7 @@
 // ==UserScript==
 // @name         XHS-Downloader
 // @namespace    https://github.com/JoeanAmier/XHS-Downloader
-// @version      1.5.2
+// @version      1.6.0
 // @description  提取小红书作品/用户链接，下载小红书无水印图文/视频作品文件
 // @author       JoeanAmier
 // @match        http*://xhslink.com/*
@ -304,43 +304,43 @@

    const extractNotesInfo = order => {
        const notesRawValue = unsafeWindow.__INITIAL_STATE__.user.notes._rawValue[order];
-        return new Set(notesRawValue.map(({id}) => id));
+        return notesRawValue.map(item => [item.id, item.xsecToken]);
    };

    const extractFeedInfo = () => {
        const notesRawValue = unsafeWindow.__INITIAL_STATE__.feed.feeds._rawValue;
-        return new Set(notesRawValue.map(({id}) => id));
+        return notesRawValue.map(item => [item.id, item.xsecToken]);
    };

    const extractSearchNotes = () => {
        const notesRawValue = unsafeWindow.__INITIAL_STATE__.search.feeds._rawValue;
-        return new Set(notesRawValue.map(({id}) => id));
+        return notesRawValue.map(item => [item.id, item.xsecToken]);
    }

    const extractSearchUsers = () => {
        const notesRawValue = unsafeWindow.__INITIAL_STATE__.search.userLists._rawValue;
-        return new Set(notesRawValue.map(({id}) => id));
+        return notesRawValue.map(item => item.id);
    }

-    const generateNoteUrls = ids => [...ids].map(id => `https://www.xiaohongshu.com/explore/${id}`).join(" ");
+    const generateNoteUrls = data => data.map(([id, token]) => `https://www.xiaohongshu.com/explore/${id}?xsec_token=${token}&xsec_source=pc_feed`).join(" ");

-    const generateUserUrls = ids => [...ids].map(id => `https://www.xiaohongshu.com/user/profile/${id}`).join(" ");
+    const generateUserUrls = data => data.map(id => `https://www.xiaohongshu.com/user/profile/${id}`).join(" ");

    const extractAllLinks = (callback, order) => {
        scrollScreen(() => {
-            let ids;
+            let data;
            if (order >= 0 && order <= 2) {
-                ids = extractNotesInfo(order);
+                data = extractNotesInfo(order);
            } else if (order === 3) {
-                ids = extractSearchNotes();
+                data = extractSearchNotes();
            } else if (order === 4) {
-                ids = extractSearchUsers();
+                data = extractSearchUsers();
            } else if (order === -1) {
-                ids = extractFeedInfo()
+                data = extractFeedInfo()
            } else {
-                ids = [];
+                data = [];
            }
-            let urlsString = order !== 4 ? generateNoteUrls(ids) : generateUserUrls(ids);
+            let urlsString = order !== 4 ? generateNoteUrls(data) : generateUserUrls(data);
            callback(urlsString);
        }, order === -1, [3, 4].includes(order))
    };