From 2a970697ef22546f5ebbf34b10d848d8d252f431 Mon Sep 17 00:00:00 2001 From: ihmily <961532186@qq.com> Date: Thu, 9 May 2024 13:08:46 +0800 Subject: [PATCH] fix: update douyin live parse --- main.py | 12 ++---------- spider.py | 42 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/main.py b/main.py index 0a72545..b5a6967 100644 --- a/main.py +++ b/main.py @@ -4,7 +4,7 @@ Author: Hmily GitHub: https://github.com/ihmily Date: 2023-07-17 23:52:05 -Update: 2024-05-06 22:45:21 +Update: 2024-05-09 12:47:29 Copyright (c) 2023-2024 by Hmily, All Rights Reserved. Function: Record live stream video. """ @@ -293,14 +293,6 @@ def get_douyin_stream_url(json_data: dict, video_quality: str) -> Dict[str, Any] m3u8_url_dict = stream_url['hls_pull_url_map'] m3u8_url_list = list(m3u8_url_dict.values()) - top_qn = stream_url['live_core_sdk_data']['pull_data']['options']['qualities'][-1]['name'] - - if top_qn == '原画': - flv_url_head, flv_url_tail = flv_url_list[-1].split('.flv') - flv_url_list = [flv_url_head.rsplit('_', maxsplit=1)[0] + '.flv' + flv_url_tail] + flv_url_list - m3u8_url_head, m3u8_url_tail = m3u8_url_list[-1].split('.m3u8') - m3u8_url_list = [m3u8_url_head.rsplit('_', maxsplit=1)[0] + '/index.m3u8' + m3u8_url_tail] + m3u8_url_list - while len(flv_url_list) < 5: flv_url_list.append(flv_url_list[-1]) m3u8_url_list.append(m3u8_url_list[-1]) @@ -1939,4 +1931,4 @@ while True: first_run = False - time.sleep(3) + time.sleep(3) \ No newline at end of file diff --git a/spider.py b/spider.py index 83ffbf8..8033265 100644 --- a/spider.py +++ b/spider.py @@ -2,9 +2,9 @@ """ Author: Hmily -GitHub:https://github.com/ihmily +GitHub: https://github.com/ihmily Date: 2023-07-15 23:15:00 -Update: 2024-05-08 12:40:18 +Update: 2024-05-09 13:03:17 Copyright (c) 2023 by Hmily, All Rights Reserved. Function: Get live stream data. """ @@ -151,6 +151,7 @@ def get_douyin_stream_data(url: str, proxy_addr: Union[str, None] = None, cookie headers['Cookie'] = cookies try: + origin_url_list = None html_str = get_req(url=url, proxy_addr=proxy_addr, headers=headers) match_json_str = re.search(r'(\{\\"state\\":.*?)]\\n"]\)', html_str) if not match_json_str: @@ -162,17 +163,50 @@ def get_douyin_stream_data(url: str, proxy_addr: Union[str, None] = None, cookie room_store = room_store.split(',"has_commerce_goods"')[0] + '}}}' json_data = json.loads(room_store)['roomInfo']['room'] json_data['anchor_name'] = anchor_name + if 'status' in json_data and json_data['status'] == 4: + return json_data + + match_json_str2 = re.search(r'"(\{\\"common\\":.*?)"]\)