feat: add dy author page link support

2025-12-26 05:48:32 +08:00 · 2024-10-08 15:29:45 +08:00 · 2024-10-08 15:29:45 +08:00 · 44af6912d4
commit 44af6912d4
parent f0700e8655
4 changed files with 61 additions and 25 deletions
--- a/README.md
+++ b/README.md
@ -105,9 +105,11 @@

 ```
 抖音：
-https://live.douyin.com/745964462470
-https://v.douyin.com/iQFeBnt/
-https://live.douyin.com/yall1102
+https://live.douyin.com/745964462470 (PC网页端地址)
+https://v.douyin.com/iQFeBnt/ (app端分享地址)
+https://live.douyin.com/yall1102 (链接+抖音号)
+https://v.douyin.com/CeiU5cbX/ (作者主页分享地址)
+

 TikTok：
 https://www.tiktok.com/@pearlgaga88/live
--- a/demo.py
+++ b/demo.py
@ -6,7 +6,7 @@ from douyinliverecorder import spider
 LIVE_STREAM_CONFIG = {
    "douyin": {
        "url": "https://live.douyin.com/745964462470",
-        "func": spider.get_douyin_stream_data,
+        "func": spider.get_douyin_app_stream_data,
    },
    "tiktok": {
        "url": "https://www.tiktok.com/@pearlgaga88/live",
@ -157,4 +157,4 @@ def test_live_stream(platform_name: str) -> None:

 if __name__ == "__main__":
    platform = "douyin"
-    test_live_stream(platform)
+    test_live_stream(platform)
--- a/douyinliverecorder/web_rid.py
+++ b/douyinliverecorder/web_rid.py
@ -4,7 +4,7 @@
 Author: Hmily
 Github:https://github.com/ihmily
 Date: 2023-07-17 23:52:05
-Update: 2024-03-06 23:35:00
+Update: 2024-10-08 23:35:00
 Copyright (c) 2023 by Hmily, All Rights Reserved.
 """
 import json
@ -25,20 +25,29 @@ HEADERS = {
    'Cookie': 's_v_web_id=verify_lk07kv74_QZYCUApD_xhiB_405x_Ax51_GYO9bUIyZQVf'
 }

+HEADERS_PC = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.97 '
+                  'Safari/537.36 Core/1.116.438.400 QQBrowser/13.0.6070.400',
+    'Cookie': 'sessionid=7494ae59ae06784454373ce25761e864; __ac_nonce=0670497840077ee4c9eb2; '
+              '__ac_signature=_02B4Z6wo00f012DZczQAAIDCJJBb3EjnINdg-XeAAL8-db;  '
+              's_v_web_id=verify_m1ztgtjj_vuHnMLZD_iwZ9_4YO4_BdN1_7wLP3pyqXsf2; ',
+    }
+

 # X-bogus算法
 def get_xbogus(url: str, headers: Union[dict, None] = None) -> str:
-    if not headers or "User-Agent" not in headers and "user-agent" not in headers:
+    if not headers or 'user-agent' not in (k.lower() for k in headers):
        headers = HEADERS
    query = urllib.parse.urlparse(url).query
-    xbogus = execjs.compile(open('./x-bogus.js').read()).call('sign', query, headers["User-Agent"])
+    xbogus = execjs.compile(open('./x-bogus.js').read()).call('sign', query, headers.get("User-Agent", "user-agent"))
    # print(xbogus)
    return xbogus


 # 获取房间ID和用户secID
-def get_sec_user_id(url: str, proxy_addr: Union[str, None] = None, headers: Union[dict, None] = None):
-    if not headers or "User-Agent" not in headers and "user-agent" not in headers:
+def get_sec_user_id(url: str, proxy_addr: Union[str, None] = None,
+                    headers: Union[dict, None] = None) -> Union[tuple, None]:
+    if not headers or all(k.lower() not in ['user-agent', 'cookie'] for k in headers):
        headers = HEADERS

    if proxy_addr:
@ -50,15 +59,37 @@ def get_sec_user_id(url: str, proxy_addr: Union[str, None] = None, headers: Unio
    else:
        response = opener.open(url, timeout=15)
    redirect_url = response.url
-    sec_user_id = re.search(r'sec_user_id=([\w_\-]+)&', redirect_url).group(1)
-    room_id = redirect_url.split('?')[0].rsplit('/', maxsplit=1)[1]
-    return room_id, sec_user_id
+    if 'reflow/' in redirect_url:
+        sec_user_id = re.search(r'sec_user_id=([\w_\-]+)&', redirect_url).group(1)
+        room_id = redirect_url.split('?')[0].rsplit('/', maxsplit=1)[1]
+        return room_id, sec_user_id
+
+
+# 获取抖音号
+def get_unique_id(url: str, proxy_addr: Union[str, None] = None,
+                  headers: Union[dict, None] = None) -> Union[str, None]:
+    if not headers or all(k.lower() not in ['user-agent', 'cookie'] for k in headers):
+        headers = HEADERS_PC
+
+    if proxy_addr:
+        proxies = {
+            'http': proxy_addr,
+            'https': proxy_addr
+        }
+        response = requests.get(url, headers=headers, proxies=proxies, timeout=15)
+    else:
+        response = opener.open(url, timeout=15)
+    redirect_url = response.url
+    sec_user_id = redirect_url.split('?')[0].rsplit('/', maxsplit=1)[1]
+    resp = requests.get(f'https://www.douyin.com/user/{sec_user_id}', headers=headers)
+    unique_id = re.findall(r'undefined\\"},\\"uniqueId\\":\\"(.*?)\\",\\"customVerify', resp.text)[-1]
+    return unique_id


 # 获取直播间webID
 def get_live_room_id(room_id: str, sec_user_id: str, proxy_addr: Union[str, None] = None,
                     params: Union[dict, None] = None, headers: Union[dict, None] = None) -> str:
-    if not headers or "User-Agent" not in headers and "user-agent" not in headers:
+    if not headers or all(k.lower() not in ['user-agent', 'cookie'] for k in headers):
        headers = HEADERS

    if not params:
@ -93,8 +124,6 @@ def get_live_room_id(room_id: str, sec_user_id: str, proxy_addr: Union[str, None

 if __name__ == '__main__':
    room_url = "https://v.douyin.com/iQLgKSj/"
-    # url="https://v.douyin.com/iQFeBnt/"
-    # url="https://v.douyin.com/iehvKttp/"
    _room_id, sec_uid = get_sec_user_id(room_url)
    web_rid = get_live_room_id(_room_id, sec_uid)
-    print("return web_rid:", web_rid)
+    print("return web_rid:", web_rid)
--- a/douyinliverecorder/spider.py
+++ b/douyinliverecorder/spider.py
@ -29,7 +29,7 @@ from .utils import (
    trace_error_decorator, dict_to_cookie_str
 )
 from .logger import script_path
-from .web_rid import get_sec_user_id
+from .room import get_sec_user_id, get_unique_id

 no_proxy_handler = urllib.request.ProxyHandler({})
 opener = urllib.request.build_opener(no_proxy_handler)
@ -173,8 +173,7 @@ def get_douyin_app_stream_data(url: str, proxy_addr: Union[str, None] = None, co
    if cookies:
        headers['Cookie'] = cookies

-    def get_app_data():
-        room_id, sec_uid = get_sec_user_id(url=url, proxy_addr=proxy_addr)
+    def get_app_data(room_id, sec_uid):
        api2 = f'https://webcast.amemv.com/webcast/room/reflow/info/?verifyFp=verify_lxj5zv70_7szNlAB7_pxNY_48Vh_ALKF_GA1Uf3yteoOY&type_id=0&live_id=1&room_id={room_id}&sec_user_id={sec_uid}&version_code=99.99.99&app_id=1128'
        json_str2 = get_req(url=api2, proxy_addr=proxy_addr, headers=headers)
        json_data2 = json.loads(json_str2)['data']
@ -192,12 +191,18 @@ def get_douyin_app_stream_data(url: str, proxy_addr: Union[str, None] = None, co
            room_data = json_data['data'][0]
            room_data['anchor_name'] = json_data['user']['nickname']
        else:
-            room_data = get_app_data()
+            data = get_sec_user_id(url, proxy_addr=proxy_addr)

-        if 'stream_url' not in room_data:
-            raise RuntimeError('该直播类型或玩法电脑端暂未支持，请使用app端分享链接进行录制')
+            if data:
+                _room_id, _sec_uid = data
+                room_data = get_app_data(_room_id, _sec_uid)
+            else:
+                unique_id = get_unique_id(url, proxy_addr=proxy_addr)
+                return get_douyin_stream_data(f'https://live.douyin.com/{unique_id}')

        if room_data['status'] == 2:
+            if 'stream_url' not in room_data:
+                raise RuntimeError('该直播类型或玩法电脑端暂未支持，请使用app端分享链接进行录制')
            live_core_sdk_data = room_data['stream_url']['live_core_sdk_data']
            pull_datas = room_data['stream_url']['pull_datas']
            if live_core_sdk_data:
@ -321,7 +326,7 @@ def get_kuaishou_stream_data(url: str, proxy_addr: Union[str, None] = None, cook

    try:
        json_str = re.search('<script>window.__INITIAL_STATE__=(.*?);\(function\(\)\{var s;', html_str).group(1)
-        play_list = re.findall('(\{"liveStream".*?),"gameInfo', json_str)[0] + "}"
+        play_list = re.findall('(\\{"liveStream".*?),"gameInfo', json_str)[0] + "}"
        play_list = json.loads(play_list)
    except (AttributeError, IndexError, json.JSONDecodeError) as e:
        print(f"Failed to parse JSON data from {url}. Error: {e}")
@ -2515,4 +2520,4 @@ def get_chzzk_stream_data(url: str, proxy_addr: Union[str, None] = None, cookies
        prefix = m3u8_url.split('?')[0].rsplit('/', maxsplit=1)[0]
        m3u8_url_list = [prefix + '/' + i for i in m3u8_url_list]
        result["play_url_list"] = m3u8_url_list
-    return result
+    return result