From 2a970697ef22546f5ebbf34b10d848d8d252f431 Mon Sep 17 00:00:00 2001
From: ihmily <961532186@qq.com>
Date: Thu, 9 May 2024 13:08:46 +0800
Subject: [PATCH] fix: update douyin live parse

---
 main.py   | 12 ++----------
 spider.py | 42 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 40 insertions(+), 14 deletions(-)
diff --git a/main.py b/main.py
index 0a72545..b5a6967 100644
--- a/main.py
+++ b/main.py
@@ -4,7 +4,7 @@
 Author: Hmily
 GitHub: https://github.com/ihmily
 Date: 2023-07-17 23:52:05
-Update: 2024-05-06 22:45:21
+Update: 2024-05-09 12:47:29
 Copyright (c) 2023-2024 by Hmily, All Rights Reserved.
 Function: Record live stream video.
 """
@@ -293,14 +293,6 @@ def get_douyin_stream_url(json_data: dict, video_quality: str) -> Dict[str, Any]
         m3u8_url_dict = stream_url['hls_pull_url_map']
         m3u8_url_list = list(m3u8_url_dict.values())
 
-        top_qn = stream_url['live_core_sdk_data']['pull_data']['options']['qualities'][-1]['name']
-
-        if top_qn == '原画':
-            flv_url_head, flv_url_tail = flv_url_list[-1].split('.flv')
-            flv_url_list = [flv_url_head.rsplit('_', maxsplit=1)[0] + '.flv' + flv_url_tail] + flv_url_list
-            m3u8_url_head, m3u8_url_tail = m3u8_url_list[-1].split('.m3u8')
-            m3u8_url_list = [m3u8_url_head.rsplit('_', maxsplit=1)[0] + '/index.m3u8' + m3u8_url_tail] + m3u8_url_list
-
         while len(flv_url_list) < 5:
             flv_url_list.append(flv_url_list[-1])
             m3u8_url_list.append(m3u8_url_list[-1])
@@ -1939,4 +1931,4 @@ while True:
 
         first_run = False
 
-    time.sleep(3)
+    time.sleep(3)
\ No newline at end of file
diff --git a/spider.py b/spider.py
index 83ffbf8..8033265 100644
--- a/spider.py
+++ b/spider.py
@@ -2,9 +2,9 @@
 
 """
 Author: Hmily
-GitHub:https://github.com/ihmily
+GitHub: https://github.com/ihmily
 Date: 2023-07-15 23:15:00
-Update: 2024-05-08 12:40:18
+Update: 2024-05-09 13:03:17
 Copyright (c) 2023 by Hmily, All Rights Reserved.
 Function: Get live stream data.
 """
@@ -151,6 +151,7 @@ def get_douyin_stream_data(url: str, proxy_addr: Union[str, None] = None, cookie
         headers['Cookie'] = cookies
 
     try:
+        origin_url_list = None
         html_str = get_req(url=url, proxy_addr=proxy_addr, headers=headers)
         match_json_str = re.search(r'(\{\\"state\\":.*?)]\\n"]\)', html_str)
         if not match_json_str:
@@ -162,17 +163,50 @@ def get_douyin_stream_data(url: str, proxy_addr: Union[str, None] = None, cookie
         room_store = room_store.split(',"has_commerce_goods"')[0] + '}}}'
         json_data = json.loads(room_store)['roomInfo']['room']
         json_data['anchor_name'] = anchor_name
+        if 'status' in json_data and json_data['status'] == 4:
+            return json_data
+
+        match_json_str2 = re.search(r'"(\{\\"common\\":.*?)"]\)</script><script nonce=', html_str)
+        if match_json_str2:
+            json_str = match_json_str2.group(1).replace('\\', '').replace('"{', '{').replace('}"', '}').replace('u0026', '&')
+            json_data2 = json.loads(json_str)
+            if 'origin' in json_data2['data']:
+                origin_url_list = json_data2['data']['origin']['main']
+
+        else:
+            match_json_str3 = re.search('"origin":\{"main":(.*?),"dash"',html_str.replace('\\', '').replace('u0026', '&'), re.S)
+            if match_json_str3:
+                origin_url_list = json.loads(match_json_str3.group(1) + '}')
+
+        if origin_url_list:
+            origin_m3u8 = {'ORIGIN': origin_url_list["hls"]}
+            origin_flv = {'ORIGIN': origin_url_list["flv"]}
+            hls_pull_url_map = json_data['stream_url']['hls_pull_url_map']
+            flv_pull_url = json_data['stream_url']['flv_pull_url']
+            json_data['stream_url']['hls_pull_url_map'] = {**origin_m3u8, **hls_pull_url_map}
+            json_data['stream_url']['flv_pull_url'] = {**origin_flv, **flv_pull_url}
         return json_data
 
     except Exception as e:
         print(f'失败地址：{url} 准备切换解析方法{e}')
         web_rid = re.match('https://live.douyin.com/(\d+)', url).group(1)
-        headers['Cookie'] = 'sessionid=b03763e09810c59948fbd9c6ab5a667a'
-        url2 = f'https://live.douyin.com/webcast/room/web/enter/?aid=6383&app_name=douyin_web&live_id=1&web_rid={web_rid}'
+        url2 = f'https://live.douyin.com/webcast/room/web/enter/?aid=6383&app_name=douyin_web&live_id=1&device_platform=web&language=zh-CN&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=116.0.0.0&web_rid={web_rid}'
         json_str = get_req(url=url2, proxy_addr=proxy_addr, headers=headers)
         json_data = json.loads(json_str)['data']
         room_data = json_data['data'][0]
         room_data['anchor_name'] = json_data['user']['nickname']
+        live_core_sdk_data = room_data['stream_url']['live_core_sdk_data']
+        if live_core_sdk_data:
+            json_str = live_core_sdk_data['pull_data']['stream_data']
+            json_data = json.loads(json_str)
+            if 'origin' in json_data['data']:
+                origin_url_list = json_data['data']['origin']['main']
+                origin_m3u8 = {'ORIGIN': origin_url_list["hls"]}
+                origin_flv = {'ORIGIN': origin_url_list["flv"]}
+                hls_pull_url_map = room_data['stream_url']['hls_pull_url_map']
+                flv_pull_url = room_data['stream_url']['flv_pull_url']
+                room_data['stream_url']['hls_pull_url_map'] = {**origin_m3u8, **hls_pull_url_map}
+                room_data['stream_url']['flv_pull_url'] = {**origin_flv, **flv_pull_url}
         return room_data