From 1ec6caa6e008792520f1e0d4c3a10f59ac232d4f Mon Sep 17 00:00:00 2001 From: kclauhk <78251477+kclauhk@users.noreply.github.com> Date: Fri, 24 Jan 2025 11:35:37 +0800 Subject: [PATCH] [ie/bilibili] Extract backup urls --- yt_dlp/extractor/bilibili.py | 60 ++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 33d9d92a0a15..c15b8cab96ad 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -72,37 +72,43 @@ def extract_formats(self, play_info): for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality'])) } + formats = [] audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict})) flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio')) if flac_audio: audios.append(flac_audio) - formats = [{ - 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'), - 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')), - 'acodec': traverse_obj(audio, ('codecs', {str.lower})), - 'vcodec': 'none', - 'tbr': float_or_none(audio.get('bandwidth'), scale=1000), - 'filesize': int_or_none(audio.get('size')), - 'format_id': str_or_none(audio.get('id')), - } for audio in audios] - - formats.extend({ - 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'), - 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')), - 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')), - 'width': int_or_none(video.get('width')), - 'height': int_or_none(video.get('height')), - 'vcodec': video.get('codecs'), - 'acodec': 'none' if audios else None, - 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))), - 'tbr': float_or_none(video.get('bandwidth'), scale=1000), - 'filesize': int_or_none(video.get('size')), - 'quality': int_or_none(video.get('id')), - 'format_id': traverse_obj( - video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1), - ('id', {str_or_none}), get_all=False), - 'format': format_names.get(video.get('id')), - } for video in traverse_obj(play_info, ('dash', 'video', ...))) + for audio in audios: + for url in sorted([u for u in dict.fromkeys(traverse_obj( + audio, (('baseUrl', 'base_url', (('backupUrl', 'backup_url'), ...), 'url'), {url_or_none}))) if u]): + formats.append({ + 'url': url, + 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')), + 'acodec': traverse_obj(audio, ('codecs', {str.lower})), + 'vcodec': 'none', + 'tbr': float_or_none(audio.get('bandwidth'), scale=1000), + 'filesize': int_or_none(audio.get('size')), + 'format_id': str_or_none(audio.get('id')), + }) + for video in traverse_obj(play_info, ('dash', 'video', ...)): + for url in sorted([u for u in dict.fromkeys(traverse_obj( + video, (('baseUrl', 'base_url', (('backupUrl', 'backup_url'), ...), 'url'), {url_or_none}))) if u]): + formats.append({ + 'url': url, + 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')), + 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')), + 'width': int_or_none(video.get('width')), + 'height': int_or_none(video.get('height')), + 'vcodec': video.get('codecs'), + 'acodec': 'none' if audios else None, + 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))), + 'tbr': float_or_none(video.get('bandwidth'), scale=1000), + 'filesize': int_or_none(video.get('size')), + 'quality': int_or_none(video.get('id')), + 'format_id': traverse_obj( + video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1), + ('id', {str_or_none}), get_all=False), + 'format': format_names.get(video.get('id')), + }) if formats: self._check_missing_formats(play_info, formats)