From 0333cb4a01d532de2743e10e80c5429c5956e9ba Mon Sep 17 00:00:00 2001 From: ihmily <114978440+ihmily@users.noreply.github.com> Date: Sat, 25 Oct 2025 15:17:50 +0800 Subject: [PATCH] optimize douyin live error catch --- src/spider.py | 155 +++++++++++++++++++++++++------------------------- 1 file changed, 76 insertions(+), 79 deletions(-) diff --git a/src/spider.py b/src/spider.py index 0a7aab7..b0a9304 100644 --- a/src/spider.py +++ b/src/spider.py @@ -65,66 +65,47 @@ async def get_play_url_list(m3u8: str, proxy: OptionalStr = None, header: Option return play_url_list -@trace_error_decorator -async def get_douyin_app_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: +async def get_douyin_web_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None): headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0', - 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', - 'Referer': 'https://live.douyin.com/', - 'Cookie': 'ttwid=1%7CB1qls3GdnZhUov9o2NxOMxxYS2ff6OSvEWbv0ytbES4%7C1680522049%7C280d802d6d478e3e78d0c807f7c487e7ffec0ae4e5fdd6a0fe74c3c6af149511; my_rd=1; passport_csrf_token=3ab34460fa656183fccfb904b16ff742; passport_csrf_token_default=3ab34460fa656183fccfb904b16ff742; d_ticket=9f562383ac0547d0b561904513229d76c9c21; n_mh=hvnJEQ4Q5eiH74-84kTFUyv4VK8xtSrpRZG1AhCeFNI; store-region=cn-fj; store-region-src=uid; LOGIN_STATUS=1; __security_server_data_status=1; FORCE_LOGIN=%7B%22videoConsumedRemainSeconds%22%3A180%7D; pwa2=%223%7C0%7C3%7C0%22; download_guide=%223%2F20230729%2F0%22; volume_info=%7B%22isUserMute%22%3Afalse%2C%22isMute%22%3Afalse%2C%22volume%22%3A0.6%7D; strategyABtestKey=%221690824679.923%22; stream_recommend_feed_params=%22%7B%5C%22cookie_enabled%5C%22%3Atrue%2C%5C%22screen_width%5C%22%3A1536%2C%5C%22screen_height%5C%22%3A864%2C%5C%22browser_online%5C%22%3Atrue%2C%5C%22cpu_core_num%5C%22%3A8%2C%5C%22device_memory%5C%22%3A8%2C%5C%22downlink%5C%22%3A10%2C%5C%22effective_type%5C%22%3A%5C%224g%5C%22%2C%5C%22round_trip_time%5C%22%3A150%7D%22; VIDEO_FILTER_MEMO_SELECT=%7B%22expireTime%22%3A1691443863751%2C%22type%22%3Anull%7D; home_can_add_dy_2_desktop=%221%22; __live_version__=%221.1.1.2169%22; device_web_cpu_core=8; device_web_memory_size=8; xgplayer_user_id=346045893336; csrf_session_id=2e00356b5cd8544d17a0e66484946f28; odin_tt=724eb4dd23bc6ffaed9a1571ac4c757ef597768a70c75fef695b95845b7ffcd8b1524278c2ac31c2587996d058e03414595f0a4e856c53bd0d5e5f56dc6d82e24004dc77773e6b83ced6f80f1bb70627; __ac_nonce=064caded4009deafd8b89; __ac_signature=_02B4Z6wo00f01HLUuwwAAIDBh6tRkVLvBQBy9L-AAHiHf7; ttcid=2e9619ebbb8449eaa3d5a42d8ce88ec835; webcast_leading_last_show_time=1691016922379; webcast_leading_total_show_times=1; webcast_local_quality=sd; live_can_add_dy_2_desktop=%221%22; msToken=1JDHnVPw_9yTvzIrwb7cQj8dCMNOoesXbA_IooV8cezcOdpe4pzusZE7NB7tZn9TBXPr0ylxmv-KMs5rqbNUBHP4P7VBFUu0ZAht_BEylqrLpzgt3y5ne_38hXDOX8o=; msToken=jV_yeN1IQKUd9PlNtpL7k5vthGKcHo0dEh_QPUQhr8G3cuYv-Jbb4NnIxGDmhVOkZOCSihNpA2kvYtHiTW25XNNX_yrsv5FN8O6zm3qmCIXcEe0LywLn7oBO2gITEeg=; tt_scid=mYfqpfbDjqXrIGJuQ7q-DlQJfUSG51qG.KUdzztuGP83OjuVLXnQHjsz-BRHRJu4e986' + 'cookie': 'ttwid=1%7C2iDIYVmjzMcpZ20fcaFde0VghXAA3NaNXE_SLR68IyE%7C1761045455' + '%7Cab35197d5cfb21df6cbb2fa7ef1c9262206b062c315b9d04da746d0b37dfbc7d', + 'referer': 'https://live.douyin.com/335354047186', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ' + 'Chrome/116.0.5845.97 Safari/537.36 Core/1.116.567.400 QQBrowser/19.7.6764.400', } if cookies: - headers['Cookie'] = cookies - - async def get_app_data(room_id: str, sec_uid: str) -> dict: - app_params = { - "verifyFp": "verify_lxj5zv70_7szNlAB7_pxNY_48Vh_ALKF_GA1Uf3yteoOY", - "type_id": "0", - "live_id": "1", - "room_id": room_id, - "sec_user_id": sec_uid, - "version_code": "99.99.99", - "app_id": "1128" - } - api2 = f'https://webcast.amemv.com/webcast/room/reflow/info/?{urllib.parse.urlencode(app_params)}' - json_str2 = await async_req(url=api2, proxy_addr=proxy_addr, headers=headers) - json_data2 = json.loads(json_str2)['data'] - room_data2 = json_data2['room'] - room_data2['anchor_name'] = room_data2['owner']['nickname'] - return room_data2 + headers['cookie'] = cookies try: - web_rid = url.split('?')[0].split('live.douyin.com/') - if len(web_rid) > 1: - web_rid = web_rid[1] - params = { - "aid": "6383", - "app_name": "douyin_web", - "live_id": "1", - "device_platform": "web", - "language": "zh-CN", - "browser_language": "zh-CN", - "browser_platform": "Win32", - "browser_name": "Chrome", - "browser_version": "116.0.0.0", - "web_rid": web_rid, - 'msToken': '', - 'a_bogus': '' + web_rid = url.split('?')[0].split('live.douyin.com/')[-1] + params = { + "aid": "6383", + "app_name": "douyin_web", + "live_id": "1", + "device_platform": "web", + "language": "zh-CN", + "browser_language": "zh-CN", + "browser_platform": "Win32", + "browser_name": "Chrome", + "browser_version": "116.0.0.0", + "web_rid": web_rid, + 'msToken': '', + } - } - api = f'https://live.douyin.com/webcast/room/web/enter/?{urllib.parse.urlencode(params)}' + api = f'https://live.douyin.com/webcast/room/web/enter/?{urllib.parse.urlencode(params)}' + a_bogus = ab_sign(urllib.parse.urlparse(api).query, headers['user-agent']) + api += "&a_bogus=" + a_bogus + try: json_str = await async_req(url=api, proxy_addr=proxy_addr, headers=headers) + if not json_str: + raise Exception("it triggered risk control") json_data = json.loads(json_str)['data'] + if not json_data['data']: + raise Exception(f"{url} VR live is not supported") room_data = json_data['data'][0] room_data['anchor_name'] = json_data['user']['nickname'] - else: - try: - data = await get_sec_user_id(url, proxy_addr=proxy_addr) - _room_id, _sec_uid = data - room_data = await get_app_data(_room_id, _sec_uid) - except UnsupportedUrlError: - unique_id = await get_unique_id(url, proxy_addr=proxy_addr) - return await get_douyin_stream_data(f'https://live.douyin.com/{unique_id}') + except Exception as e: + raise Exception(f"Douyin web data fetch error, because {e}.") if room_data['status'] == 2: if 'stream_url' not in room_data: @@ -160,40 +141,56 @@ async def get_douyin_app_stream_data(url: str, proxy_addr: OptionalStr = None, c return room_data -async def get_douyin_web_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None): +@trace_error_decorator +async def get_douyin_app_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: headers = { - 'cookie': 'ttwid=1%7C2iDIYVmjzMcpZ20fcaFde0VghXAA3NaNXE_SLR68IyE%7C1761045455' - '%7Cab35197d5cfb21df6cbb2fa7ef1c9262206b062c315b9d04da746d0b37dfbc7d', - 'referer': 'https://live.douyin.com/335354047186', - 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ' - 'Chrome/116.0.5845.97 Safari/537.36 Core/1.116.567.400 QQBrowser/19.7.6764.400', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' + 'Chrome/141.0.0.0 Safari/537.36 Edg/141.0.0.0', + 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', + 'Referer': 'https://live.douyin.com/', + 'Cookie': 'ttwid=1%7CB1qls3GdnZhUov9o2NxOMxxYS2ff6OSvEWbv0ytbES4%7C1680522049%7C280d802d6d478e3e78d0c807f7c487e7ffec0ae4e5fdd6a0fe74c3c6af149511; my_rd=1; passport_csrf_token=3ab34460fa656183fccfb904b16ff742; passport_csrf_token_default=3ab34460fa656183fccfb904b16ff742; d_ticket=9f562383ac0547d0b561904513229d76c9c21; n_mh=hvnJEQ4Q5eiH74-84kTFUyv4VK8xtSrpRZG1AhCeFNI; store-region=cn-fj; store-region-src=uid; LOGIN_STATUS=1; __security_server_data_status=1; FORCE_LOGIN=%7B%22videoConsumedRemainSeconds%22%3A180%7D; pwa2=%223%7C0%7C3%7C0%22; download_guide=%223%2F20230729%2F0%22; volume_info=%7B%22isUserMute%22%3Afalse%2C%22isMute%22%3Afalse%2C%22volume%22%3A0.6%7D; strategyABtestKey=%221690824679.923%22; stream_recommend_feed_params=%22%7B%5C%22cookie_enabled%5C%22%3Atrue%2C%5C%22screen_width%5C%22%3A1536%2C%5C%22screen_height%5C%22%3A864%2C%5C%22browser_online%5C%22%3Atrue%2C%5C%22cpu_core_num%5C%22%3A8%2C%5C%22device_memory%5C%22%3A8%2C%5C%22downlink%5C%22%3A10%2C%5C%22effective_type%5C%22%3A%5C%224g%5C%22%2C%5C%22round_trip_time%5C%22%3A150%7D%22; VIDEO_FILTER_MEMO_SELECT=%7B%22expireTime%22%3A1691443863751%2C%22type%22%3Anull%7D; home_can_add_dy_2_desktop=%221%22; __live_version__=%221.1.1.2169%22; device_web_cpu_core=8; device_web_memory_size=8; xgplayer_user_id=346045893336; csrf_session_id=2e00356b5cd8544d17a0e66484946f28; odin_tt=724eb4dd23bc6ffaed9a1571ac4c757ef597768a70c75fef695b95845b7ffcd8b1524278c2ac31c2587996d058e03414595f0a4e856c53bd0d5e5f56dc6d82e24004dc77773e6b83ced6f80f1bb70627; __ac_nonce=064caded4009deafd8b89; __ac_signature=_02B4Z6wo00f01HLUuwwAAIDBh6tRkVLvBQBy9L-AAHiHf7; ttcid=2e9619ebbb8449eaa3d5a42d8ce88ec835; webcast_leading_last_show_time=1691016922379; webcast_leading_total_show_times=1; webcast_local_quality=sd; live_can_add_dy_2_desktop=%221%22; msToken=1JDHnVPw_9yTvzIrwb7cQj8dCMNOoesXbA_IooV8cezcOdpe4pzusZE7NB7tZn9TBXPr0ylxmv-KMs5rqbNUBHP4P7VBFUu0ZAht_BEylqrLpzgt3y5ne_38hXDOX8o=; msToken=jV_yeN1IQKUd9PlNtpL7k5vthGKcHo0dEh_QPUQhr8G3cuYv-Jbb4NnIxGDmhVOkZOCSihNpA2kvYtHiTW25XNNX_yrsv5FN8O6zm3qmCIXcEe0LywLn7oBO2gITEeg=; tt_scid=mYfqpfbDjqXrIGJuQ7q-DlQJfUSG51qG.KUdzztuGP83OjuVLXnQHjsz-BRHRJu4e986' } if cookies: - headers['cookie'] = cookies + headers['Cookie'] = cookies + + async def get_app_data(room_id: str, sec_uid: str) -> dict: + app_params = { + "verifyFp": "verify_hwj52020_7szNlAB7_pxNY_48Vh_ALKF_GA1Uf3yteoOY", + "type_id": "0", + "live_id": "1", + "room_id": room_id, + "sec_user_id": sec_uid, + "version_code": "99.99.99", + "app_id": "1128" + } + api2 = f'https://webcast.amemv.com/webcast/room/reflow/info/?{urllib.parse.urlencode(app_params)}' + a_bogus = ab_sign(urllib.parse.urlparse(api2).query, headers['User-Agent']) + api2 += "&a_bogus=" + a_bogus + try: + json_str2 = await async_req(url=api2, proxy_addr=proxy_addr, headers=headers) + if not json_str2: + raise Exception("it triggered risk control") + json_data2 = json.loads(json_str2)['data'] + if not json_data2.get('room'): + raise Exception(f"{url} VR live is not supported") + room_data2 = json_data2['room'] + room_data2['anchor_name'] = room_data2['owner']['nickname'] + return room_data2 + except Exception as e: + raise Exception(f"Douyin app data fetch error, because {e}.") try: - web_rid = url.split('?')[0].split('live.douyin.com/')[-1] - params = { - "aid": "6383", - "app_name": "douyin_web", - "live_id": "1", - "device_platform": "web", - "language": "zh-CN", - "browser_language": "zh-CN", - "browser_platform": "Win32", - "browser_name": "Chrome", - "browser_version": "116.0.0.0", - "web_rid": web_rid, - 'msToken': '', - } - - api = f'https://live.douyin.com/webcast/room/web/enter/?{urllib.parse.urlencode(params)}' - a_bogus = ab_sign(urllib.parse.urlparse(api).query, headers['user-agent']) - api += "&a_bogus=" + a_bogus - json_str = await async_req(url=api, proxy_addr=proxy_addr, headers=headers) - json_data = json.loads(json_str)['data'] - room_data = json_data['data'][0] - room_data['anchor_name'] = json_data['user']['nickname'] + web_rid = url.split('?')[0].split('live.douyin.com/') + if len(web_rid) > 1: + return await get_douyin_web_stream_data(url, proxy_addr, cookies) + else: + try: + data = await get_sec_user_id(url, proxy_addr=proxy_addr) + _room_id, _sec_uid = data + room_data = await get_app_data(_room_id, _sec_uid) + except UnsupportedUrlError: + unique_id = await get_unique_id(url, proxy_addr=proxy_addr) + return await get_douyin_stream_data(f'https://live.douyin.com/{unique_id}') if room_data['status'] == 2: if 'stream_url' not in room_data: