diff --git a/src/room.py b/src/room.py index 45a3977..b72f468 100644 --- a/src/room.py +++ b/src/room.py @@ -18,6 +18,10 @@ no_proxy_handler = urllib.request.ProxyHandler({}) opener = urllib.request.build_opener(no_proxy_handler) +class UnsupportedUrlError(Exception): + pass + + HEADERS = { 'User-Agent': 'Mozilla/5.0 (Linux; Android 11; SAMSUNG SM-G973U) AppleWebKit/537.36 (KHTML, like Gecko) ' 'SamsungBrowser/14.2 Chrome/87.0.4280.141 Mobile Safari/537.36', @@ -28,9 +32,7 @@ HEADERS = { HEADERS_PC = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.97 ' 'Safari/537.36 Core/1.116.438.400 QQBrowser/13.0.6070.400', - 'Cookie': 'sessionid=7494ae59ae06784454373ce25761e864; __ac_nonce=0670497840077ee4c9eb2; ' - '__ac_signature=_02B4Z6wo00f012DZczQAAIDCJJBb3EjnINdg-XeAAL8-db; ' - 's_v_web_id=verify_m1ztgtjj_vuHnMLZD_iwZ9_4YO4_BdN1_7wLP3pyqXsf2; ', + 'Cookie': '', } @@ -61,38 +63,44 @@ async def get_sec_user_id(url: str, proxy_addr: str | None = None, headers: dict room_id = str(redirect_url).split('?')[0].rsplit('/', maxsplit=1)[1] return room_id, sec_user_id else: - print("Could not find sec_user_id in the URL.") + raise RuntimeError("Could not find sec_user_id in the URL.") else: - print("The redirect URL does not contain 'reflow/'.") + raise UnsupportedUrlError("The redirect URL does not contain 'reflow/'.") + except UnsupportedUrlError as e: + raise e except Exception as e: - print(f"An error occurred: {e}") - return None + raise RuntimeError(f"An error occurred: {e}") # 获取抖音号 async def get_unique_id(url: str, proxy_addr: str | None = None, headers: dict | None = None) -> str | None: if not headers or all(k.lower() not in ['user-agent', 'cookie'] for k in headers): - headers = HEADERS_PC + headers = HEADERS try: proxy_addr = utils.handle_proxy_addr(proxy_addr) async with httpx.AsyncClient(proxy=proxy_addr, timeout=15) as client: response = await client.get(url, headers=headers, follow_redirects=True) redirect_url = str(response.url) + if 'reflow/' in str(redirect_url): + raise UnsupportedUrlError("Unsupported URL") sec_user_id = redirect_url.split('?')[0].rsplit('/', maxsplit=1)[1] - - user_page_response = await client.get(f'https://www.douyin.com/user/{sec_user_id}', headers=headers) - matches = re.findall(r'undefined\\"},\\"uniqueId\\":\\"(.*?)\\",\\"customVerify', - user_page_response.text) + headers['Cookie'] = ('ttwid=1%7C4ejCkU2bKY76IySQENJwvGhg1IQZrgGEupSyTKKfuyk%7C1740470403%7Cbc9a' + 'd2ee341f1a162f9e27f4641778030d1ae91e31f9df6553a8f2efa3bdb7b4; __ac_nonce=06' + '83e59f3009cc48fbab0; __ac_signature=_02B4Z6wo00f01mG6waQAAIDB9JUCzFb6.TZhmsU' + 'AAPBf34; __ac_referer=__ac_blank') + user_page_response = await client.get(f'https://www.iesdouyin.com/share/user/{sec_user_id}', + headers=headers, follow_redirects=True) + matches = re.findall(r'unique_id":"(.*?)","verification_type', user_page_response.text) if matches: unique_id = matches[-1] return unique_id else: - print("Could not find unique_id in the response.") - return None + raise RuntimeError("Could not find unique_id in the response.") + except UnsupportedUrlError as e: + raise e except Exception as e: - print(f"An error occurred: {e}") - return None + raise RuntimeError(f"An error occurred: {e}") # 获取直播间webID diff --git a/src/spider.py b/src/spider.py index a196e4d..2e72240 100644 --- a/src/spider.py +++ b/src/spider.py @@ -25,7 +25,7 @@ import urllib.request from . import JS_SCRIPT_PATH, utils from .utils import trace_error_decorator from .logger import script_path -from .room import get_sec_user_id, get_unique_id +from .room import get_sec_user_id, get_unique_id, UnsupportedUrlError from .http_clients.async_http import async_req @@ -113,12 +113,11 @@ async def get_douyin_app_stream_data(url: str, proxy_addr: OptionalStr = None, c room_data = json_data['data'][0] room_data['anchor_name'] = json_data['user']['nickname'] else: - data = await get_sec_user_id(url, proxy_addr=proxy_addr) - - if data: + try: + data = await get_sec_user_id(url, proxy_addr=proxy_addr) _room_id, _sec_uid = data room_data = await get_app_data(_room_id, _sec_uid) - else: + except UnsupportedUrlError: unique_id = await get_unique_id(url, proxy_addr=proxy_addr) return await get_douyin_stream_data(f'https://live.douyin.com/{unique_id}') @@ -3003,4 +3002,4 @@ async def get_faceit_stream_data(url: str, proxy_addr: OptionalStr = None, cooki result['anchor_name'] = anchor_name else: result = {'anchor_name': anchor_name, 'is_live': False} - return result + return result \ No newline at end of file