diff --git a/demo.py b/demo.py index a1ffc1e..9bc2f82 100644 --- a/demo.py +++ b/demo.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import asyncio from douyinliverecorder.logger import logger from douyinliverecorder import spider @@ -193,7 +194,7 @@ def test_live_stream(platform_name: str, proxy_addr=None, cookies=None) -> None: if platform_name in LIVE_STREAM_CONFIG: config = LIVE_STREAM_CONFIG[platform_name] try: - stream_data = config['func'](config['url'], proxy_addr=proxy_addr, cookies=cookies) + stream_data = asyncio.run(config['func'](config['url'], proxy_addr=proxy_addr, cookies=cookies)) logger.debug(f"Stream data for {platform_name}: {stream_data}") except Exception as e: logger.error(f"Error fetching stream data for {platform_name}: {e}") diff --git a/douyinliverecorder/room.py b/douyinliverecorder/room.py index 3dd975f..b856fe6 100644 --- a/douyinliverecorder/room.py +++ b/douyinliverecorder/room.py @@ -11,6 +11,7 @@ import json import re import urllib.parse import execjs +import httpx import requests import urllib.request from . import JS_SCRIPT_PATH @@ -44,51 +45,69 @@ def get_xbogus(url: str, headers: dict | None = None) -> str: # 获取房间ID和用户secID -def get_sec_user_id(url: str, proxy_addr: str | None = None, headers: dict | None = None) -> tuple | None: +async def get_sec_user_id(url: str, proxy_addr: str | None = None, headers: dict | None = None) -> tuple | None: + # 如果没有提供headers或者headers中不包含user-agent和cookie,则使用默认headers if not headers or all(k.lower() not in ['user-agent', 'cookie'] for k in headers): headers = HEADERS - if proxy_addr: - proxies = { - 'http': proxy_addr, - 'https': proxy_addr - } - response = requests.get(url, headers=headers, proxies=proxies, timeout=15) - else: - response = opener.open(url, timeout=15) - redirect_url = response.url - if 'reflow/' in redirect_url: - sec_user_id = re.search(r'sec_user_id=([\w_\-]+)&', redirect_url).group(1) - room_id = redirect_url.split('?')[0].rsplit('/', maxsplit=1)[1] - return room_id, sec_user_id + try: + async with httpx.AsyncClient(proxies=proxy_addr, timeout=15) as client: + response = await client.get(url, headers=headers, follow_redirects=True) + + redirect_url = response.url + if 'reflow/' in str(redirect_url): + match = re.search(r'sec_user_id=([\w_\-]+)&', str(redirect_url)) + if match: + sec_user_id = match.group(1) + room_id = str(redirect_url).split('?')[0].rsplit('/', maxsplit=1)[1] + return room_id, sec_user_id + else: + print("Could not find sec_user_id in the URL.") + else: + print("The redirect URL does not contain 'reflow/'.") + except Exception as e: + print(f"An error occurred: {e}") + + return None # 获取抖音号 -def get_unique_id(url: str, proxy_addr: str | None = None, headers: dict | None = None) -> str: +async def get_unique_id(url: str, proxy_addr: str | None = None, headers: dict | None = None) -> str | None: + # 如果没有提供headers或者headers中不包含user-agent和cookie,则使用默认headers if not headers or all(k.lower() not in ['user-agent', 'cookie'] for k in headers): headers = HEADERS_PC - if proxy_addr: - proxies = { - 'http': proxy_addr, - 'https': proxy_addr - } - response = requests.get(url, headers=headers, proxies=proxies, timeout=15) - else: - response = opener.open(url, timeout=15) - redirect_url = response.url - sec_user_id = redirect_url.split('?')[0].rsplit('/', maxsplit=1)[1] - resp = requests.get(f'https://www.douyin.com/user/{sec_user_id}', headers=headers) - unique_id = re.findall(r'undefined\\"},\\"uniqueId\\":\\"(.*?)\\",\\"customVerify', resp.text)[-1] - return unique_id + try: + async with httpx.AsyncClient(proxies=proxy_addr, timeout=15) as client: + # 第一次请求,获取重定向后的URL以提取sec_user_id + response = await client.get(url, headers=headers, follow_redirects=True) + redirect_url = str(response.url) + sec_user_id = redirect_url.split('?')[0].rsplit('/', maxsplit=1)[1] + + # 第二次请求,获取用户页面内容来提取unique_id + user_page_response = await client.get(f'https://www.douyin.com/user/{sec_user_id}', headers=headers) + + # 使用正则表达式查找unique_id + matches = re.findall(r'undefined\\"},\\"uniqueId\\":\\"(.*?)\\",\\"customVerify', user_page_response.text) + if matches: + unique_id = matches[-1] + return unique_id + else: + print("Could not find unique_id in the response.") + return None + except Exception as e: + print(f"An error occurred: {e}") + return None # 获取直播间webID -def get_live_room_id(room_id: str, sec_user_id: str, proxy_addr: str | None = None, +async def get_live_room_id(room_id: str, sec_user_id: str, proxy_addr: str | None = None, params: dict | None = None, headers: dict | None = None) -> str: + # 如果没有提供headers或者headers中不包含user-agent和cookie,则使用默认headers if not headers or all(k.lower() not in ['user-agent', 'cookie'] for k in headers): headers = HEADERS + # 设置默认参数 if not params: params = { "verifyFp": "verify_lk07kv74_QZYCUApD_xhiB_405x_Ax51_GYO9bUIyZQVf", @@ -100,23 +119,27 @@ def get_live_room_id(room_id: str, sec_user_id: str, proxy_addr: str | None = No "msToken": "wrqzbEaTlsxt52-vxyZo_mIoL0RjNi1ZdDe7gzEGMUTVh_HvmbLLkQrA_1HKVOa2C6gkxb6IiY6TY2z8enAkPEwGq--gM" "-me3Yudck2ailla5Q4osnYIHxd9dI4WtQ==", } + + # 构建API URL并添加X-Bogus签名 api = f'https://webcast.amemv.com/webcast/room/reflow/info/?{urllib.parse.urlencode(params)}' xbogus = get_xbogus(api) api = api + "&X-Bogus=" + xbogus - if proxy_addr: - proxies = { - 'http': proxy_addr, - 'https': proxy_addr - } - response = requests.get(api, headers=headers, proxies=proxies, timeout=15) - json_str = response.text - else: - req = urllib.request.Request(api, headers=headers) - response = opener.open(req, timeout=15) - json_str = response.read().decode('utf-8') - json_data = json.loads(json_str) - return json_data['data']['room']['owner']['web_rid'] + try: + async with httpx.AsyncClient(proxies={"http://": proxy_addr, "https://": proxy_addr} if proxy_addr else None, + timeout=15) as client: + response = await client.get(api, headers=headers) + response.raise_for_status() # 检查HTTP响应状态码是否表示成功 + + json_data = response.json() + web_rid = json_data['data']['room']['owner']['web_rid'] + return web_rid + except httpx.HTTPStatusError as e: + print(f"HTTP status error occurred: {e.response.status_code}") + raise + except Exception as e: + print(f"An exception occurred during get_live_room_id: {e}") + raise if __name__ == '__main__': diff --git a/douyinliverecorder/spider.py b/douyinliverecorder/spider.py index 2ab461a..ef3ec18 100644 --- a/douyinliverecorder/spider.py +++ b/douyinliverecorder/spider.py @@ -4,7 +4,7 @@ Author: Hmily GitHub: https://github.com/ihmily Date: 2023-07-15 23:15:00 -Update: 2025-01-23 15:48:16 +Update: 2025-01-23 18:57:16 Copyright (c) 2023-2024 by Hmily, All Rights Reserved. Function: Get live stream data. """ @@ -18,6 +18,8 @@ import urllib.parse import urllib.error from urllib.request import Request from typing import List + +import httpx import requests import ssl import re @@ -42,16 +44,51 @@ OptionalStr = str | None OptionalDict = dict | None -def get_req( +async def async_req( url: str, proxy_addr: OptionalStr = None, headers: OptionalDict = None, data: dict | bytes | None = None, json_data: dict | list | None = None, timeout: int = 20, + redirect_url: bool = False, abroad: bool = False, content_conding: str = 'utf-8', +) -> str: + if headers is None: + headers = {} + try: + if proxy_addr: + if not proxy_addr.startswith('http'): + proxy_addr = 'http://' + proxy_addr + print(f'使用代理: {proxy_addr}') + + if data or json_data: + async with httpx.AsyncClient(proxies=proxy_addr, timeout=timeout) as client: + response = await client.post(url, data=data, json=json_data, headers=headers) + else: + async with httpx.AsyncClient(proxies=proxy_addr, timeout=timeout) as client: + response = await client.get(url, headers=headers) + + if redirect_url: + return str(response.url) + resp_str = response.text + except Exception as e: + resp_str = str(e) + + return resp_str + + +def sync_req( + url: str, + proxy_addr: OptionalStr = None, + headers: OptionalDict = None, + data: dict | bytes | None = None, + json_data: dict | list | None = None, + timeout: int = 20, redirect_url: bool = False, + abroad: bool = False, + content_conding: str = 'utf-8', ) -> str: if headers is None: headers = {} @@ -113,45 +150,22 @@ def get_req( return resp_str -def get_response_status(url: str, proxy_addr: OptionalStr = None, headers: OptionalDict = None, timeout: int = 10, +async def get_response_status(url: str, proxy_addr: OptionalStr = None, headers: OptionalDict = None, timeout: int = 10, abroad: bool = False) -> bool: - if headers is None: - headers = {} - if proxy_addr: - try: - proxies = { - 'http': proxy_addr, - 'https': proxy_addr - } - response = requests.head(url, proxies=proxies, headers=headers, allow_redirects=True, timeout=timeout) - if response.status_code == 200: - return True - except requests.exceptions.Timeout: - print("Request timed out, the requested address may be inaccessible or the server is unresponsive.") - except requests.exceptions.TooManyRedirects: - print("Too many redirects, the requested address may be inaccessible.") - except requests.exceptions.RequestException as e: - print(f"Request error occurred: {e}") - - else: - try: - req = urllib.request.Request(url, headers=headers) - if abroad: - response = urllib.request.urlopen(req, timeout=timeout) - else: - response = opener.open(req, timeout=timeout) - http_code = response.getcode() - if http_code == 200: - return True - except urllib.error.URLError as e: - if hasattr(e, 'reason'): - print(f"Request failed, reason: {e.reason}") - elif hasattr(e, 'code'): - print(f"Request failed, HTTP status code: {e.code}") - except Exception as e: - print(f"Request error occurred: {e}") - return False + try: + # 创建一个异步客户端,可以在这里设置代理和超时 + async with httpx.AsyncClient(proxies=proxy_addr, timeout=timeout) as client: + # 发送HEAD请求,允许重定向 + response = await client.head(url, headers=headers, follow_redirects=True) + # 检查状态码是否为200 + return response.status_code == 200 + except requests.exceptions.Timeout: + print("Request timed out, the requested address may be inaccessible or the server is unresponsive.") + except requests.exceptions.TooManyRedirects: + print("Too many redirects, the requested address may be inaccessible.") + except requests.exceptions.RequestException as e: + print(f"Request error occurred: {e}") def get_params(url: str, params: str) -> OptionalStr: parsed_url = urllib.parse.urlparse(url) @@ -187,9 +201,9 @@ def replace_url(file_path: str, old: str, new: str) -> None: f.write(content.replace(old, new)) -def get_play_url_list(m3u8: str, proxy: OptionalStr = None, header: OptionalDict = None, +async def get_play_url_list(m3u8: str, proxy: OptionalStr = None, header: OptionalDict = None, abroad: bool = False) -> List[str]: - resp = get_req(url=m3u8, proxy_addr=proxy, headers=header, abroad=abroad) + resp = await async_req(url=m3u8, proxy_addr=proxy, headers=header, abroad=abroad) play_url_list = [] for i in resp.split('\n'): if i.startswith('https://'): @@ -206,7 +220,7 @@ def get_play_url_list(m3u8: str, proxy: OptionalStr = None, header: OptionalDict @trace_error_decorator -def get_douyin_app_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: +async def get_douyin_app_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', @@ -216,7 +230,7 @@ def get_douyin_app_stream_data(url: str, proxy_addr: OptionalStr = None, cookies if cookies: headers['Cookie'] = cookies - def get_app_data(room_id: str, sec_uid: str) -> dict: + async def get_app_data(room_id: str, sec_uid: str) -> dict: app_params = { "verifyFp": "verify_lxj5zv70_7szNlAB7_pxNY_48Vh_ALKF_GA1Uf3yteoOY", "type_id": "0", @@ -227,7 +241,7 @@ def get_douyin_app_stream_data(url: str, proxy_addr: OptionalStr = None, cookies "app_id": "1128" } api2 = f'https://webcast.amemv.com/webcast/room/reflow/info/?{urllib.parse.urlencode(app_params)}' - json_str2 = get_req(url=api2, proxy_addr=proxy_addr, headers=headers) + json_str2 = await async_req(url=api2, proxy_addr=proxy_addr, headers=headers) json_data2 = json.loads(json_str2)['data'] room_data2 = json_data2['room'] room_data2['anchor_name'] = room_data2['owner']['nickname'] @@ -251,19 +265,19 @@ def get_douyin_app_stream_data(url: str, proxy_addr: OptionalStr = None, cookies } api = f'https://live.douyin.com/webcast/room/web/enter/?{urllib.parse.urlencode(params)}' - json_str = get_req(url=api, proxy_addr=proxy_addr, headers=headers) + json_str = await async_req(url=api, proxy_addr=proxy_addr, headers=headers) json_data = json.loads(json_str)['data'] room_data = json_data['data'][0] room_data['anchor_name'] = json_data['user']['nickname'] else: - data = get_sec_user_id(url, proxy_addr=proxy_addr) + data = await get_sec_user_id(url, proxy_addr=proxy_addr) if data: _room_id, _sec_uid = data - room_data = get_app_data(_room_id, _sec_uid) + room_data = await get_app_data(_room_id, _sec_uid) else: - unique_id = get_unique_id(url, proxy_addr=proxy_addr) - return get_douyin_stream_data(f'https://live.douyin.com/{unique_id}') + unique_id = await get_unique_id(url, proxy_addr=proxy_addr) + return await get_douyin_stream_data(f'https://live.douyin.com/{unique_id}') if room_data['status'] == 2: if 'stream_url' not in room_data: @@ -295,7 +309,7 @@ def get_douyin_app_stream_data(url: str, proxy_addr: OptionalStr = None, cookies @trace_error_decorator -def get_douyin_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: +async def get_douyin_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', @@ -307,7 +321,7 @@ def get_douyin_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: Op try: origin_url_list = None - html_str = get_req(url=url, proxy_addr=proxy_addr, headers=headers) + html_str = await async_req(url=url, proxy_addr=proxy_addr, headers=headers) match_json_str = re.search(r'(\{\\"state\\":.*?)]\\n"]\)', html_str) if not match_json_str: match_json_str = re.search(r'(\{\\"common\\":.*?)]\\n"]\)
\n\\s+(We regret to inform you that we have discontinu.*?)\\.\n\\s+
', html_str) @@ -378,7 +392,7 @@ def get_tiktok_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: Op @trace_error_decorator -def get_kuaishou_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: +async def get_kuaishou_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', @@ -386,7 +400,7 @@ def get_kuaishou_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: if cookies: headers['Cookie'] = cookies try: - html_str = get_req(url=url, proxy_addr=proxy_addr, headers=headers) + html_str = await async_req(url=url, proxy_addr=proxy_addr, headers=headers) except Exception as e: print(f"Failed to fetch data from {url}.{e}") return {"type": 1, "is_live": False} @@ -427,7 +441,7 @@ def get_kuaishou_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: @trace_error_decorator -def get_kuaishou_stream_data2(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict | None: +async def get_kuaishou_stream_data2(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict | None: headers = { 'User-Agent': 'ios/7.830 (ios 17.0; ; iPhone 15 (A2846/A3089/A3090/A3092))', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', @@ -441,7 +455,7 @@ def get_kuaishou_stream_data2(url: str, proxy_addr: OptionalStr = None, cookies: eid = url.split('/u/')[1].strip() data = {"source": 5, "eid": eid, "shareMethod": "card", "clientType": "WEB_OUTSIDE_SHARE_H5"} app_api = 'https://livev.m.chenzhongtech.com/rest/k/live/byUser?kpn=GAME_ZONE&captchaToken=' - json_str = get_req(url=app_api, proxy_addr=proxy_addr, headers=headers, data=data) + json_str = await async_req(url=app_api, proxy_addr=proxy_addr, headers=headers, data=data) json_data = json.loads(json_str) live_stream = json_data['liveStream'] anchor_name = live_stream['user']['user_name'] @@ -466,11 +480,11 @@ def get_kuaishou_stream_data2(url: str, proxy_addr: OptionalStr = None, cookies: return result except Exception as e: print(f"{e}, Failed URL: {url}, preparing to switch to a backup plan for re-parsing.") - return get_kuaishou_stream_data(url, cookies=cookies, proxy_addr=proxy_addr) + return await get_kuaishou_stream_data(url, cookies=cookies, proxy_addr=proxy_addr) @trace_error_decorator -def get_huya_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: +async def get_huya_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', @@ -480,14 +494,14 @@ def get_huya_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: Opti if cookies: headers['Cookie'] = cookies - html_str = get_req(url=url, proxy_addr=proxy_addr, headers=headers) + html_str = await async_req(url=url, proxy_addr=proxy_addr, headers=headers) json_str = re.findall('stream: (\\{"data".*?),"iWebDefaultBitRate"', html_str)[0] json_data = json.loads(json_str + '}') return json_data @trace_error_decorator -def get_huya_app_stream_url(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: +async def get_huya_app_stream_url(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: headers = { 'User-Agent': 'ios/7.830 (ios 17.0; ; iPhone 15 (A2846/A3089/A3090/A3092))', 'xweb_xhr': '1', @@ -500,7 +514,7 @@ def get_huya_app_stream_url(url: str, proxy_addr: OptionalStr = None, cookies: O room_id = url.split('?')[0].rsplit('/', maxsplit=1)[-1] if any(char.isalpha() for char in room_id): - html_str = get_req(url, proxy_addr=proxy_addr, headers=headers) + html_str = await async_req(url, proxy_addr=proxy_addr, headers=headers) room_id = re.search('ProfileRoom":(.*?),"sPrivateHost', html_str) if room_id: room_id = room_id.group(1) @@ -514,7 +528,7 @@ def get_huya_app_stream_url(url: str, proxy_addr: OptionalStr = None, cookies: O 'showSecret': '1', } wx_app_api = f'https://mp.huya.com/cache.php?{urllib.parse.urlencode(params)}' - json_str = get_req(url=wx_app_api, proxy_addr=proxy_addr, headers=headers) + json_str = await async_req(url=wx_app_api, proxy_addr=proxy_addr, headers=headers) json_data = json.loads(json_str) anchor_name = json_data['data']['profileInfo']['nick'] live_status = json_data['data']['realLiveStatus'] @@ -542,7 +556,7 @@ def get_huya_app_stream_url(url: str, proxy_addr: OptionalStr = None, cookies: O ) flv_url = 'https://' + play_url_list[0]['flv_url'].split('://')[1] try: - record_url = get_req(flv_url, proxy_addr=proxy_addr, headers=headers, redirect_url=True, timeout=15) + record_url = await async_req(flv_url, proxy_addr=proxy_addr, headers=headers, redirect_url=True, timeout=15) except TimeoutError: record_url = flv_url @@ -560,10 +574,10 @@ def md5(data) -> str: return hashlib.md5(data.encode('utf-8')).hexdigest() -def get_token_js(rid: str, did: str, proxy_addr: OptionalStr = None) -> List[str]: +async def get_token_js(rid: str, did: str, proxy_addr: OptionalStr = None) -> List[str]: url = f'https://www.douyu.com/{rid}' - html_str = get_req(url=url, proxy_addr=proxy_addr) + html_str = await async_req(url=url, proxy_addr=proxy_addr) result = re.search(r'(vdwdae325w_64we[\s\S]*function ub98484234[\s\S]*?)function', html_str).group(1) func_ub9 = re.sub(r'eval.*?;}', 'strc;}', result) js = execjs.compile(func_ub9) @@ -584,7 +598,7 @@ def get_token_js(rid: str, did: str, proxy_addr: OptionalStr = None) -> List[str @trace_error_decorator -def get_douyu_info_data(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: +async def get_douyu_info_data(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: headers = { 'User-Agent': 'ios/7.830 (ios 17.0; ; iPhone 15 (A2846/A3089/A3090/A3092))', 'Referer': 'https://m.douyu.com/3125893?rid=3125893&dyshid=0-96003918aa5365bc6dcb4933000316p1&dyshci=181', @@ -598,14 +612,14 @@ def get_douyu_info_data(url: str, proxy_addr: OptionalStr = None, cookies: Optio rid = match_rid.group(1) else: rid = re.search('douyu.com/(.*?)(?=\\?|$)', url).group(1) - html_str = get_req(url=f'https://m.douyu.com/{rid}', proxy_addr=proxy_addr, headers=headers) + html_str = await async_req(url=f'https://m.douyu.com/{rid}', proxy_addr=proxy_addr, headers=headers) json_str = re.findall('', html_str)[0] json_data = json.loads(json_str) rid = json_data['pageProps']['room']['roomInfo']['roomInfo']['rid'] headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0' url2 = f'https://www.douyu.com/betard/{rid}' - json_str = get_req(url=url2, proxy_addr=proxy_addr, headers=headers) + json_str = await async_req(url=url2, proxy_addr=proxy_addr, headers=headers) json_data = json.loads(json_str) result = { "anchor_name": json_data['room']['nickname'], @@ -619,10 +633,10 @@ def get_douyu_info_data(url: str, proxy_addr: OptionalStr = None, cookies: Optio @trace_error_decorator -def get_douyu_stream_data(rid: str, rate: str = '-1', proxy_addr: OptionalStr = None, +async def get_douyu_stream_data(rid: str, rate: str = '-1', proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: did = '10000000000000000000000000003306' - params_list = get_token_js(rid, did, proxy_addr=proxy_addr) + params_list = await get_token_js(rid, did, proxy_addr=proxy_addr) headers = { 'User-Agent': 'ios/7.830 (ios 17.0; ; iPhone 15 (A2846/A3089/A3090/A3092))', 'Referer': 'https://m.douyu.com/3125893?rid=3125893&dyshid=0-96003918aa5365bc6dcb4933000316p1&dyshci=181', @@ -643,13 +657,13 @@ def get_douyu_stream_data(rid: str, rate: str = '-1', proxy_addr: OptionalStr = # app_api = 'https://m.douyu.com/hgapi/livenc/room/getStreamUrl' app_api = f'https://www.douyu.com/lapi/live/getH5Play/{rid}' - json_str = get_req(url=app_api, proxy_addr=proxy_addr, headers=headers, data=data) + json_str = await async_req(url=app_api, proxy_addr=proxy_addr, headers=headers, data=data) json_data = json.loads(json_str) return json_data @trace_error_decorator -def get_yy_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: +async def get_yy_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', @@ -659,7 +673,7 @@ def get_yy_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: Option if cookies: headers['Cookie'] = cookies - html_str = get_req(url=url, proxy_addr=proxy_addr, headers=headers) + html_str = await async_req(url=url, proxy_addr=proxy_addr, headers=headers) anchor_name = re.search('nick: "(.*?)",\n\\s+logo', html_str).group(1) cid = re.search('sid : "(.*?)",\n\\s+ssid', html_str, re.DOTALL).group(1) @@ -674,7 +688,7 @@ def get_yy_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: Option "encode": "json" } url2 = f'https://stream-manager.yy.com/v3/channel/streams?{urllib.parse.urlencode(params)}' - json_str = get_req(url=url2, data=data_bytes, proxy_addr=proxy_addr, headers=headers) + json_str = await async_req(url=url2, data=data_bytes, proxy_addr=proxy_addr, headers=headers) json_data = json.loads(json_str) json_data['anchor_name'] = anchor_name @@ -685,14 +699,14 @@ def get_yy_stream_data(url: str, proxy_addr: OptionalStr = None, cookies: Option '_': int(time.time() * 1000), } detail_api = f'https://www.yy.com/live/detail?{urllib.parse.urlencode(params)}' - json_str2 = get_req(detail_api, proxy_addr=proxy_addr, headers=headers) + json_str2 = await async_req(detail_api, proxy_addr=proxy_addr, headers=headers) json_data2 = json.loads(json_str2) json_data['title'] = json_data2['data']['roomName'] return json_data @trace_error_decorator -def get_bilibili_room_info_h5(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> str: +async def get_bilibili_room_info_h5(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> str: headers = { 'origin': 'https://live.bilibili.com', 'referer': 'https://live.bilibili.com/', @@ -703,14 +717,14 @@ def get_bilibili_room_info_h5(url: str, proxy_addr: OptionalStr = None, cookies: room_id = url.split('?')[0].rsplit('/', maxsplit=1)[1] api = f'https://api.live.bilibili.com/xlive/web-room/v1/index/getH5InfoByRoom?room_id={room_id}' - json_str = get_req(api, proxy_addr=proxy_addr, headers=headers) + json_str = await async_req(api, proxy_addr=proxy_addr, headers=headers) room_info = json.loads(json_str) title = room_info['data']['room_info']['title'] return title @trace_error_decorator -def get_bilibili_room_info(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: +async def get_bilibili_room_info(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', @@ -721,18 +735,18 @@ def get_bilibili_room_info(url: str, proxy_addr: OptionalStr = None, cookies: Op try: room_id = url.split('?')[0].rsplit('/', maxsplit=1)[1] - json_str = get_req(f'https://api.live.bilibili.com/room/v1/Room/room_init?id={room_id}', + json_str = await async_req(f'https://api.live.bilibili.com/room/v1/Room/room_init?id={room_id}', proxy_addr=proxy_addr, headers=headers) room_info = json.loads(json_str) uid = room_info['data']['uid'] live_status = True if room_info['data']['live_status'] == 1 else False api = f'https://api.live.bilibili.com/live_user/v1/Master/info?uid={uid}' - json_str2 = get_req(url=api, proxy_addr=proxy_addr, headers=headers) + json_str2 = await async_req(url=api, proxy_addr=proxy_addr, headers=headers) anchor_info = json.loads(json_str2) anchor_name = anchor_info['data']['info']['uname'] - title = get_bilibili_room_info_h5(url, proxy_addr, cookies) + title = await get_bilibili_room_info_h5(url, proxy_addr, cookies) return {"anchor_name": anchor_name, "live_status": live_status, "room_url": url, "title": title} except Exception as e: print(e) @@ -740,7 +754,7 @@ def get_bilibili_room_info(url: str, proxy_addr: OptionalStr = None, cookies: Op @trace_error_decorator -def get_bilibili_stream_data(url: str, qn: str = '10000', platform: str = 'web', proxy_addr: OptionalStr = None, +async def get_bilibili_stream_data(url: str, qn: str = '10000', platform: str = 'web', proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> OptionalStr: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0', @@ -758,7 +772,7 @@ def get_bilibili_stream_data(url: str, qn: str = '10000', platform: str = 'web', 'platform': platform, } play_api = f'https://api.live.bilibili.com/room/v1/Room/playUrl?{urllib.parse.urlencode(params)}' - json_str = get_req(play_api, proxy_addr=proxy_addr, headers=headers) + json_str = await async_req(play_api, proxy_addr=proxy_addr, headers=headers) json_data = json.loads(json_str) if json_data and json_data['code'] == 0: for i in json_data['data']['durl']: @@ -781,7 +795,7 @@ def get_bilibili_stream_data(url: str, qn: str = '10000', platform: str = 'web', # 此接口因网页上有限制, 需要配置登录后的cookie才能获取最高画质 api = f'https://api.live.bilibili.com/xlive/web-room/v2/index/getRoomPlayInfo?{urllib.parse.urlencode(params)}' - json_str = get_req(api, proxy_addr=proxy_addr, headers=headers) + json_str = await async_req(api, proxy_addr=proxy_addr, headers=headers) json_data = json.loads(json_str) if json_data['data']['live_status'] == 0: print("The anchor did not start broadcasting.") @@ -803,7 +817,7 @@ def get_bilibili_stream_data(url: str, qn: str = '10000', platform: str = 'web', @trace_error_decorator -def get_xhs_stream_url(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: +async def get_xhs_stream_url(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: headers = { 'User-Agent': 'ios/7.830 (ios 17.0; ; iPhone 15 (A2846/A3089/A3090/A3092))', 'xy-common-params': 'platform=iOS&sid=session.1722166379345546829388', @@ -813,14 +827,14 @@ def get_xhs_stream_url(url: str, proxy_addr: OptionalStr = None, cookies: Option headers['Cookie'] = cookies if 'xhslink.com' in url: - url = get_req(url, proxy_addr=proxy_addr, headers=headers, redirect_url=True) + url = await async_req(url, proxy_addr=proxy_addr, headers=headers, redirect_url=True) result = {"anchor_name": '', "is_live": False} flv_url = '' room_id = re.search('/livestream/(.*?)(?=/|\\?|$)', url) host_id = get_params(url, 'host_id') if room_id: - html_str = get_req(url, proxy_addr=proxy_addr, headers=headers) + html_str = await async_req(url, proxy_addr=proxy_addr, headers=headers) json_str = re.search('window.__INITIAL_STATE__=(.*?)', html_str, re.S).group(1) json_data = json.loads(json_str) live_title = json_data['liveStream']['roomData']['roomInfo']['roomTitle'] @@ -852,14 +866,16 @@ def get_xhs_stream_url(url: str, proxy_addr: OptionalStr = None, cookies: Option } # app方案 app_api = f'https://live-room.xiaohongshu.com/api/sns/v1/live/user_status?{urllib.parse.urlencode(params)}' - json_str = get_req(app_api, proxy_addr=proxy_addr, headers=headers) + json_str = await async_req(app_api, proxy_addr=proxy_addr, headers=headers) json_data = json.loads(json_str) if json_data["success"]: if json_data['data']: live_link = json_data['data'][0]['live_link'] anchor_name = get_params(live_link, "host_nickname") - - if flv_url and get_response_status(flv_url, proxy_addr=proxy_addr, headers=headers, timeout=5): + status = await get_response_status( + result['record_url'], proxy_addr=proxy_addr, headers=headers, timeout=5 + ) + if flv_url and status: result['is_live'] = True return result flv_url = get_params(live_link, "flvUrl") @@ -874,18 +890,17 @@ def get_xhs_stream_url(url: str, proxy_addr: OptionalStr = None, cookies: Option 'record_url': flv_url } else: - html_str = get_req(url, proxy_addr=proxy_addr, headers=headers) + html_str = await async_req(url, proxy_addr=proxy_addr, headers=headers) json_str = re.search('window.__INITIAL_STATE__=(.*?)', html_str, re.S).group(1) json_data = json.loads(json_str) anchor_name = json_data['profile']['userInfo']['nickname'] result['anchor_name'] = anchor_name - else: - print(f"xhs {json_data['msg']}") + return result @trace_error_decorator -def get_bigo_stream_url(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: +async def get_bigo_stream_url(url: str, proxy_addr: OptionalStr = None, cookies: OptionalStr = None) -> dict: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', @@ -896,7 +911,7 @@ def get_bigo_stream_url(url: str, proxy_addr: OptionalStr = None, cookies: Optio headers['Cookie'] = cookies if 'bigo.tv' not in url: - html_str = get_req(url, proxy_addr=proxy_addr, headers=headers) + html_str = await async_req(url, proxy_addr=proxy_addr, headers=headers) web_url = re.search( '', html_str).group(1) @@ -909,7 +924,7 @@ def get_bigo_stream_url(url: str, proxy_addr: OptionalStr = None, cookies: Optio data = {'siteId': room_id} # roomId url2 = 'https://ta.bigo.tv/official_website/studio/getInternalStudioInfo' - json_str = get_req(url=url2, proxy_addr=proxy_addr, headers=headers, data=data) + json_str = await async_req(url=url2, proxy_addr=proxy_addr, headers=headers, data=data) json_data = json.loads(json_str) anchor_name = json_data['data']['nick_name'] live_status = json_data['data']['alive'] @@ -922,14 +937,14 @@ def get_bigo_stream_url(url: str, proxy_addr: OptionalStr = None, cookies: Optio result['record_url'] = m3u8_url result |= {"title": live_title, "is_live": True, "m3u8_url": m3u8_url, 'record_url': m3u8_url} elif result['anchor_name'] == '': - html_str = get_req(url=f'https://www.bigo.tv/cn/{room_id}', proxy_addr=proxy_addr, headers=headers) + html_str = await async_req(url=f'https://www.bigo.tv/cn/{room_id}', proxy_addr=proxy_addr, headers=headers) result['anchor_name'] = re.search('