From 70b4cb54f81fdbfdc5a9e7c76bca56d4501ab41b Mon Sep 17 00:00:00 2001 From: kclauhk <78251477+kclauhk@users.noreply.github.com> Date: Tue, 17 Dec 2024 23:46:50 +0800 Subject: [PATCH] Update extrememusic.py --- yt_dlp/extractor/extrememusic.py | 204 +++++++++++++++---------------- 1 file changed, 96 insertions(+), 108 deletions(-) diff --git a/yt_dlp/extractor/extrememusic.py b/yt_dlp/extractor/extrememusic.py index de31c496feec..6128c18f1af0 100644 --- a/yt_dlp/extractor/extrememusic.py +++ b/yt_dlp/extractor/extrememusic.py @@ -22,11 +22,6 @@ class ExtremeMusicBaseIE(InfoExtractor): def _initialize(self, url, video_id, country=None): self._REQUIRE_VERSION = (self._configuration_arg('ver', ie_key='extrememusic') or self._configuration_arg('version', ie_key='extrememusic')) - # This site serves different versions of the same playlist id due to geo-restriction - # use user's own country code if no code (geo_bypass_country or pre-defined country code) is provided - if not country: - country = self._download_webpage('https://ipapi.co/country_code', video_id) - self.to_screen(f'Set country code to {country}') env = self._download_json('https://www.extrememusic.com/env', video_id) self._REQUEST_HEADERS = { 'Accept': 'application/json', @@ -53,11 +48,11 @@ def _get_album_data(self, album_id, video_id, fatal=True): def _extract_track(self, album_data, track_id=None, version_id=None): if 'tracks' in album_data and 'track_sounds' in album_data: if not track_id and version_id: - track_id = traverse_obj(album_data['track_sounds'], - (lambda _, v: v['id'] == int(version_id), 'track_id', {int}), get_all=False) - if track := traverse_obj(album_data['tracks'], - (lambda _, v: v['id'] == int(track_id), {dict}), get_all=False): - info = {**traverse_obj(track, { + track_id = traverse_obj(album_data, ( + 'track_sounds', lambda _, v: v['id'] == int(version_id), 'track_id', {int}), get_all=False) + if track := traverse_obj(album_data, ( + 'tracks', lambda _, v: v['id'] == int(track_id), {dict}), get_all=False): + track_info = {**traverse_obj(track, { 'track': ('title', {str}), 'track_number': ('sort_order', {lambda v: v + 1}, {int}), 'track_id': ('track_no', {str}), @@ -68,10 +63,10 @@ def _extract_track(self, album_data, track_id=None, version_id=None): 'genres': (('genre', 'subgenre'), ..., 'label'), 'tag': ('keywords', ..., 'label'), 'album': ('album_title', {lambda v: str_or_none(v) or None}), - }), **traverse_obj(album_data, { - 'album_artists': ('album', 'artist', {lambda v: [v] if v else None}), - 'upload_date': ('album', 'created', {unified_strdate}), - })} + }), **traverse_obj(album_data, ('album', { + 'album_artists': ('artist', {lambda v: [v] if v else None}), + 'upload_date': ('created', {unified_strdate}), + }))} entries, thumbnails = [], [] for image in traverse_obj(track, ('images', 'default')): thumbnails.append(traverse_obj(image, { @@ -80,17 +75,18 @@ def _extract_track(self, album_data, track_id=None, version_id=None): 'height': ('height', {int_or_none}), })) if not self._REQUIRE_VERSION: - version_id = version_id or traverse_obj(track, 'default_track_sound_id', ('track_sound_ids', 0)) + version_id = (version_id + or traverse_obj(track, 'default_track_sound_id', ('track_sound_ids', 0))) for sound_id in [version_id] if version_id else track['track_sound_ids']: - if sound := traverse_obj(album_data['track_sounds'], - (lambda _, v: v['id'] == int(sound_id) and v['track_id'] == int(track_id), - {dict}), get_all=False): + if sound := traverse_obj(album_data, ( + 'track_sounds', lambda _, v: v['id'] == int(sound_id) and v['track_id'] == int(track_id), + {dict}), get_all=False): if (version_id or 'all' in self._REQUIRE_VERSION or any(x in sound['version_type'].lower() for x in self._REQUIRE_VERSION)): formats = [] - for audio_url in traverse_obj(sound, ('assets', 'audio', ('preview_url', - 'preview_url_hls'))): + for audio_url in traverse_obj( + sound, ('assets', 'audio', ('preview_url', 'preview_url_hls'))): if determine_ext(audio_url) == 'm3u8': m3u8_url = re.sub(r'\.m3u8\?.*', '/HLS/128_v4.m3u8', audio_url) for f in self._extract_m3u8_formats(m3u8_url, sound_id, 'm4a', fatal=False): @@ -108,25 +104,14 @@ def _extract_track(self, album_data, track_id=None, version_id=None): 'id': str(sound_id), 'title': join_nonempty('title', 'version_type', from_dict=sound, delim=' - '), 'alt_title': sound['version_type'], - **info, + **track_info, 'thumbnails': thumbnails, 'duration': sound.get('duration'), 'formats': formats, 'webpage_url': f"https://www.extrememusic.com/albums/{track['album_id']}?item={track_id}&ver={sound_id}", }) - - if len(entries) > 1: - return { - 'id': track_id, - **info, - 'entries': entries, - '_type': 'playlist', - } - elif len(entries) == 1: - return entries[0] - else: - self.raise_no_formats('Track data not found', video_id=track_id) - return [] + return [entries, track_info] + self.raise_no_formats('Track data not found', video_id=track_id) class ExtremeMusicIE(ExtremeMusicBaseIE): @@ -214,10 +199,12 @@ def _real_extract(self, url): album_id, track_id, version_id = self._match_valid_url(url).group('album', 'id', 'ver') self._initialize(url, version_id or track_id, self.get_param('geo_bypass_country') or 'DE') album_data = self._get_album_data(album_id, version_id or track_id) - if result := self._extract_track(album_data, track_id, version_id): - return result - else: - self.raise_no_formats('No formats were found') + if track := self._extract_track(album_data, track_id, version_id): + if len(track[0]) > 1: + return self.playlist_result(track[0], **track[1]) + elif len(track[0]) == 1: + return track[0][0] + self.raise_no_formats('No formats were found') class ExtremeMusicAIE(ExtremeMusicBaseIE): @@ -247,32 +234,26 @@ def _real_extract(self, url): album_id = self._match_id(url) self._initialize(url, album_id, self.get_param('geo_bypass_country') or 'DE') album_data = self._get_album_data(album_id, album_id) + subgenres = traverse_obj(album_data, ('album', 'subgenres', {str_or_none})) + album_info = merge_dicts(traverse_obj(album_data, ('album', { + 'id': ('id', {lambda v: str(v)}), + 'album': ('title', {str_or_none}), + 'description': ('description', {lambda v: str_or_none(v) or None}), + 'artists': ('artist', {lambda v: [v] if v else None}), + 'genres': ('genres', {str_or_none}, + {lambda v: join_nonempty(v, subgenres, delim=', ')}, + {lambda v: v.split(', ') if v else None}), + 'tag': ('keywords', {lambda v: v.split(', ') if v else None}), + })), { + 'description': traverse_obj( + album_data, ('bio', 'description', {lambda v: str_or_none(v) or None})), + }) - entries = [] - for track_id in traverse_obj(album_data, ('tracks', ..., 'id')): - if track := self._extract_track(album_data, track_id=track_id): - if track.get('entries'): - entries.extend(track['entries']) - else: - entries.append(track) - - if entries: - subgenres = traverse_obj(album_data, ('album', 'subgenres', {str_or_none})) - return merge_dicts(traverse_obj(album_data.get('album'), { - 'id': ('id', {lambda v: str(v)}), - 'album': ('title', {str_or_none}), - 'description': ('description', {lambda v: str_or_none(v) or None}), - 'artists': ('artist', {lambda v: [v] if v else None}), - 'genres': ('genres', {str_or_none}, {lambda v: join_nonempty(v, subgenres, delim=', ')}, - {lambda v: v.split(', ') if v else None}), - 'tag': ('keywords', {lambda v: v.split(', ') if v else None}), - }), { - 'description': traverse_obj(album_data, ('bio', 'description', {lambda v: str_or_none(v) or None})), - 'entries': entries, - '_type': 'playlist', - }) - else: - self.raise_no_formats('No formats were found') + return self.playlist_result( + (entry for sounds in (track[0] for track in (self._extract_track(album_data, track_id=track_id) + for track_id in traverse_obj(album_data, ('tracks', ..., 'id')))) + for entry in sounds), + **album_info) class ExtremeMusicPIE(ExtremeMusicBaseIE): @@ -285,8 +266,7 @@ class ExtremeMusicPIE(ExtremeMusicBaseIE): 'title': 'NICE', 'thumbnail': 'https://d2oet5a29f64lj.cloudfront.net/img-data/w/2480/featureditem/square/thumbnail_PLAYLIST_Nice-square-(formerly ChristmasTraditional).jpg', }, - 'playlist_mincount': 29, - 'expected_warnings': ['This playlist has geo-restricted items. Try using --xff to specify a different country code, e.g. DE'], + 'playlist_count': 40, }, { 'url': 'https://www.extrememusic.com/playlists/fUKKU5KAfK61pAAKp4U4KpKUxsRk2ki_fU117KpUUAAUKAUfpA6UAfAKK8Ul5ji', 'info_dict': { @@ -299,60 +279,68 @@ class ExtremeMusicPIE(ExtremeMusicBaseIE): def _real_extract(self, url): playlist_id = self._match_id(url) - self._initialize(url, playlist_id, self.get_param('geo_bypass_country')) + self._initialize(url, playlist_id, self.get_param('geo_bypass_country') or 'DE') - def playlist_query(playlist_id, offset, limit): + def playlist_query(playlist_id, offset, limit, note=None): # playlist api: https://snapi.extrememusic.com/playlists?id={playlist_id}&range={offset}%2C{limit}' return self._download_json( - 'https://snapi.extrememusic.com/playlists', playlist_id, - note=f'Downloading item {offset + 1}-{offset + limit}', query={ + f'{self._API_URL}/playlists', playlist_id, note=note, query={ 'id': playlist_id, - 'range': f'{offset},{limit}', + 'range': f'{offset + 1},{limit}', }, headers=self._REQUEST_HEADERS) - thumbnails, entries = [], [] - album_data, track_done, limit = {}, [], 50 - for i in itertools.count(): - playlist = playlist_query(playlist_id, i * limit, limit) - if len(playlist['playlist_items']) == 0: - break - else: - track_ids = traverse_obj(playlist, ('playlist_items', ..., 'track_id')) - for track_id in list(dict.fromkeys(track_ids)): - if track_id not in track_done: - album_id = traverse_obj(playlist, - ('tracks', lambda _, v: v['id'] == track_id, 'album_id', {int}), get_all=False) - if album_id not in album_data: - album_data[album_id] = self._get_album_data(album_id, track_id, fatal=False) - playlist['album'] = traverse_obj(album_data, (album_id, 'album', {dict})) - if track := self._extract_track(playlist, track_id=track_id): - if track.get('entries'): - entries.extend(track['entries']) - else: - entries.append(track) - track_done.append(track_id) - if len(track_done) >= playlist['playlist']['playlist_items_count']: - break - - if entries: - if len(track_done) < playlist['playlist']['playlist_items_count']: - self.report_warning('This playlist has geo-restricted items. Try using --xff to specify a different country code, e.g. DE') + def extract_playlist(playlist_id): + albums, tracks_done, items_count, limit = {}, [], 0, 25 + for i in itertools.count(): + try: + # try to tackle geo restriction by shortening playlist id + playlist = playlist_query(playlist_id.split('_')[0], i * limit, limit, + note=f'Downloading item {i * limit + 1}-{i * limit + limit}') + except Exception: + playlist = playlist_query(playlist_id, i * limit, limit, + note=f'Downloading item {i * limit + 1}-{i * limit + limit}') + if playlist_items_count := traverse_obj(playlist, ('playlist', 'playlist_items_count')): + if len(playlist.get('tracks', [])) == 0: + if items_count < playlist_items_count: + self.report_warning('This playlist has geo-restricted items. Try using --xff to specify a different country code') + break + else: + track_ids = traverse_obj(playlist, ('tracks', ..., 'id')) + items_count += len(track_ids) + for track_id in list(dict.fromkeys(track_ids)): + if track_id not in tracks_done: + album_id = traverse_obj( + playlist, ('tracks', lambda _, v: v['id'] == track_id, + 'album_id', {int}), get_all=False) + if album_id not in albums: + albums[album_id] = self._get_album_data(album_id, track_id, fatal=False) + playlist['album'] = traverse_obj(albums, (album_id, 'album', {dict})) + if track := self._extract_track(playlist, track_id=track_id): + if len(track[0]) > 1: + yield from track[0] + elif len(track[0]) == 1: + yield track[0][0] + tracks_done.append(track_id) + if items_count >= playlist_items_count: + break + else: + return [] - for image in traverse_obj(playlist['playlist'], ('images', 'square')): + playlist_info, thumbnails = {}, [] + if playlist := playlist_query(playlist_id, 0, 1, note='Downloading JSON metadata'): + for image in traverse_obj(playlist, ('playlist', 'images', 'square')): thumbnails.append(traverse_obj(image, { 'url': ('url', {url_or_none}), 'width': ('width', {int_or_none}), 'height': ('height', {int_or_none}), })) - - return {k: v for k, v in { - 'id': playlist['playlist']['id'], - 'title': playlist['playlist']['title'], + playlist_info = { + **traverse_obj(playlist, ('playlist', { + 'id': ('id', {str}), + 'title': ('title', {str_or_none}), + 'uploader': ('owner_name', {str_or_none}), + })), 'thumbnail': traverse_obj(thumbnails, (0, 'url', {url_or_none})), 'thumbnails': thumbnails, - 'uploader': playlist['playlist']['owner_name'], - 'entries': entries, - '_type': 'playlist', - }.items() if v} - else: - self.raise_no_formats('No formats were found') + } + return self.playlist_result(extract_playlist(playlist_id), **playlist_info)