feat: add dy author page link support

This commit is contained in:
ihmily 2024-10-08 15:29:45 +08:00
parent f0700e8655
commit 44af6912d4
4 changed files with 61 additions and 25 deletions

View File

@ -105,9 +105,11 @@
```
抖音:
https://live.douyin.com/745964462470
https://v.douyin.com/iQFeBnt/
https://live.douyin.com/yall1102
https://live.douyin.com/745964462470 (PC网页端地址)
https://v.douyin.com/iQFeBnt/ (app端分享地址)
https://live.douyin.com/yall1102 (链接+抖音号)
https://v.douyin.com/CeiU5cbX/ (作者主页分享地址)
TikTok
https://www.tiktok.com/@pearlgaga88/live

View File

@ -6,7 +6,7 @@ from douyinliverecorder import spider
LIVE_STREAM_CONFIG = {
"douyin": {
"url": "https://live.douyin.com/745964462470",
"func": spider.get_douyin_stream_data,
"func": spider.get_douyin_app_stream_data,
},
"tiktok": {
"url": "https://www.tiktok.com/@pearlgaga88/live",
@ -157,4 +157,4 @@ def test_live_stream(platform_name: str) -> None:
if __name__ == "__main__":
platform = "douyin"
test_live_stream(platform)
test_live_stream(platform)

View File

@ -4,7 +4,7 @@
Author: Hmily
Github:https://github.com/ihmily
Date: 2023-07-17 23:52:05
Update: 2024-03-06 23:35:00
Update: 2024-10-08 23:35:00
Copyright (c) 2023 by Hmily, All Rights Reserved.
"""
import json
@ -25,20 +25,29 @@ HEADERS = {
'Cookie': 's_v_web_id=verify_lk07kv74_QZYCUApD_xhiB_405x_Ax51_GYO9bUIyZQVf'
}
HEADERS_PC = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.97 '
'Safari/537.36 Core/1.116.438.400 QQBrowser/13.0.6070.400',
'Cookie': 'sessionid=7494ae59ae06784454373ce25761e864; __ac_nonce=0670497840077ee4c9eb2; '
'__ac_signature=_02B4Z6wo00f012DZczQAAIDCJJBb3EjnINdg-XeAAL8-db; '
's_v_web_id=verify_m1ztgtjj_vuHnMLZD_iwZ9_4YO4_BdN1_7wLP3pyqXsf2; ',
}
# X-bogus算法
def get_xbogus(url: str, headers: Union[dict, None] = None) -> str:
if not headers or "User-Agent" not in headers and "user-agent" not in headers:
if not headers or 'user-agent' not in (k.lower() for k in headers):
headers = HEADERS
query = urllib.parse.urlparse(url).query
xbogus = execjs.compile(open('./x-bogus.js').read()).call('sign', query, headers["User-Agent"])
xbogus = execjs.compile(open('./x-bogus.js').read()).call('sign', query, headers.get("User-Agent", "user-agent"))
# print(xbogus)
return xbogus
# 获取房间ID和用户secID
def get_sec_user_id(url: str, proxy_addr: Union[str, None] = None, headers: Union[dict, None] = None):
if not headers or "User-Agent" not in headers and "user-agent" not in headers:
def get_sec_user_id(url: str, proxy_addr: Union[str, None] = None,
headers: Union[dict, None] = None) -> Union[tuple, None]:
if not headers or all(k.lower() not in ['user-agent', 'cookie'] for k in headers):
headers = HEADERS
if proxy_addr:
@ -50,15 +59,37 @@ def get_sec_user_id(url: str, proxy_addr: Union[str, None] = None, headers: Unio
else:
response = opener.open(url, timeout=15)
redirect_url = response.url
sec_user_id = re.search(r'sec_user_id=([\w_\-]+)&', redirect_url).group(1)
room_id = redirect_url.split('?')[0].rsplit('/', maxsplit=1)[1]
return room_id, sec_user_id
if 'reflow/' in redirect_url:
sec_user_id = re.search(r'sec_user_id=([\w_\-]+)&', redirect_url).group(1)
room_id = redirect_url.split('?')[0].rsplit('/', maxsplit=1)[1]
return room_id, sec_user_id
# 获取抖音号
def get_unique_id(url: str, proxy_addr: Union[str, None] = None,
headers: Union[dict, None] = None) -> Union[str, None]:
if not headers or all(k.lower() not in ['user-agent', 'cookie'] for k in headers):
headers = HEADERS_PC
if proxy_addr:
proxies = {
'http': proxy_addr,
'https': proxy_addr
}
response = requests.get(url, headers=headers, proxies=proxies, timeout=15)
else:
response = opener.open(url, timeout=15)
redirect_url = response.url
sec_user_id = redirect_url.split('?')[0].rsplit('/', maxsplit=1)[1]
resp = requests.get(f'https://www.douyin.com/user/{sec_user_id}', headers=headers)
unique_id = re.findall(r'undefined\\"},\\"uniqueId\\":\\"(.*?)\\",\\"customVerify', resp.text)[-1]
return unique_id
# 获取直播间webID
def get_live_room_id(room_id: str, sec_user_id: str, proxy_addr: Union[str, None] = None,
params: Union[dict, None] = None, headers: Union[dict, None] = None) -> str:
if not headers or "User-Agent" not in headers and "user-agent" not in headers:
if not headers or all(k.lower() not in ['user-agent', 'cookie'] for k in headers):
headers = HEADERS
if not params:
@ -93,8 +124,6 @@ def get_live_room_id(room_id: str, sec_user_id: str, proxy_addr: Union[str, None
if __name__ == '__main__':
room_url = "https://v.douyin.com/iQLgKSj/"
# url="https://v.douyin.com/iQFeBnt/"
# url="https://v.douyin.com/iehvKttp/"
_room_id, sec_uid = get_sec_user_id(room_url)
web_rid = get_live_room_id(_room_id, sec_uid)
print("return web_rid:", web_rid)
print("return web_rid:", web_rid)

View File

@ -29,7 +29,7 @@ from .utils import (
trace_error_decorator, dict_to_cookie_str
)
from .logger import script_path
from .web_rid import get_sec_user_id
from .room import get_sec_user_id, get_unique_id
no_proxy_handler = urllib.request.ProxyHandler({})
opener = urllib.request.build_opener(no_proxy_handler)
@ -173,8 +173,7 @@ def get_douyin_app_stream_data(url: str, proxy_addr: Union[str, None] = None, co
if cookies:
headers['Cookie'] = cookies
def get_app_data():
room_id, sec_uid = get_sec_user_id(url=url, proxy_addr=proxy_addr)
def get_app_data(room_id, sec_uid):
api2 = f'https://webcast.amemv.com/webcast/room/reflow/info/?verifyFp=verify_lxj5zv70_7szNlAB7_pxNY_48Vh_ALKF_GA1Uf3yteoOY&type_id=0&live_id=1&room_id={room_id}&sec_user_id={sec_uid}&version_code=99.99.99&app_id=1128'
json_str2 = get_req(url=api2, proxy_addr=proxy_addr, headers=headers)
json_data2 = json.loads(json_str2)['data']
@ -192,12 +191,18 @@ def get_douyin_app_stream_data(url: str, proxy_addr: Union[str, None] = None, co
room_data = json_data['data'][0]
room_data['anchor_name'] = json_data['user']['nickname']
else:
room_data = get_app_data()
data = get_sec_user_id(url, proxy_addr=proxy_addr)
if 'stream_url' not in room_data:
raise RuntimeError('该直播类型或玩法电脑端暂未支持请使用app端分享链接进行录制')
if data:
_room_id, _sec_uid = data
room_data = get_app_data(_room_id, _sec_uid)
else:
unique_id = get_unique_id(url, proxy_addr=proxy_addr)
return get_douyin_stream_data(f'https://live.douyin.com/{unique_id}')
if room_data['status'] == 2:
if 'stream_url' not in room_data:
raise RuntimeError('该直播类型或玩法电脑端暂未支持请使用app端分享链接进行录制')
live_core_sdk_data = room_data['stream_url']['live_core_sdk_data']
pull_datas = room_data['stream_url']['pull_datas']
if live_core_sdk_data:
@ -321,7 +326,7 @@ def get_kuaishou_stream_data(url: str, proxy_addr: Union[str, None] = None, cook
try:
json_str = re.search('<script>window.__INITIAL_STATE__=(.*?);\(function\(\)\{var s;', html_str).group(1)
play_list = re.findall('(\{"liveStream".*?),"gameInfo', json_str)[0] + "}"
play_list = re.findall('(\\{"liveStream".*?),"gameInfo', json_str)[0] + "}"
play_list = json.loads(play_list)
except (AttributeError, IndexError, json.JSONDecodeError) as e:
print(f"Failed to parse JSON data from {url}. Error: {e}")
@ -2515,4 +2520,4 @@ def get_chzzk_stream_data(url: str, proxy_addr: Union[str, None] = None, cookies
prefix = m3u8_url.split('?')[0].rsplit('/', maxsplit=1)[0]
m3u8_url_list = [prefix + '/' + i for i in m3u8_url_list]
result["play_url_list"] = m3u8_url_list
return result
return result