From 18f015fdf12385a5147423356be7a31b11b5f535 Mon Sep 17 00:00:00 2001 From: Cory Hall Date: Thu, 31 Jan 2019 20:51:37 -0500 Subject: [PATCH 1/7] [soundcloud] Fix paged playlist download archival rel: https://github.com/rg3/youtube-dl/issues/19022 --- youtube_dl/extractor/soundcloud.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 81c81c8d58e..13e9a9d3020 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -397,13 +397,26 @@ def resolve_permalink_url(candidates): if isinstance(cand, dict): permalink_url = cand.get('permalink_url') entry_id = self._extract_id(cand) + title = cand.get('title') if permalink_url and permalink_url.startswith('http'): - return permalink_url, entry_id + return permalink_url, entry_id, title for e in collection: - permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist'))) + tr_dict = e.get('track') + pl_dict = e.get('playlist') + + permalink_url, entry_id, entry_title = resolve_permalink_url( + (e, tr_dict, pl_dict)) if permalink_url: - entries.append(self.url_result(permalink_url, video_id=entry_id)) + entry_data = self.url_result( + permalink_url, + video_id=entry_id, video_title=entry_title) + + if isinstance(tr_dict, dict): + # if entry has track data, attach extractor key + entry_data['ie_key'] = SoundcloudIE.ie_key() + + entries.append(entry_data) next_href = response.get('next_href') if not next_href: From 3dd3463e2d2266061f72fb030f70c671b12bed34 Mon Sep 17 00:00:00 2001 From: Cory Hall Date: Fri, 1 Feb 2019 17:02:28 -0500 Subject: [PATCH 2/7] [soundcloud] Code review fixes --- youtube_dl/extractor/soundcloud.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 13e9a9d3020..eede322299a 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -402,19 +402,17 @@ def resolve_permalink_url(candidates): return permalink_url, entry_id, title for e in collection: - tr_dict = e.get('track') - pl_dict = e.get('playlist') + extractor = None + if isinstance(e.get('track'), dict): + # if entry has track data, attach extractor key + extractor = SoundcloudIE.ie_key() permalink_url, entry_id, entry_title = resolve_permalink_url( - (e, tr_dict, pl_dict)) + (e, e.get('track'), e.get('playlist'))) if permalink_url: entry_data = self.url_result( permalink_url, - video_id=entry_id, video_title=entry_title) - - if isinstance(tr_dict, dict): - # if entry has track data, attach extractor key - entry_data['ie_key'] = SoundcloudIE.ie_key() + ie=extractor, video_id=entry_id, video_title=entry_title) entries.append(entry_data) From e907e2f8b4e75775dc1dd98644bf2ed07736d213 Mon Sep 17 00:00:00 2001 From: Cory Hall Date: Fri, 1 Feb 2019 17:24:10 -0500 Subject: [PATCH 3/7] [soundcloud] Use `suitable` method to check entry integrity --- youtube_dl/extractor/soundcloud.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index eede322299a..4dd906c7249 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -402,14 +402,15 @@ def resolve_permalink_url(candidates): return permalink_url, entry_id, title for e in collection: - extractor = None - if isinstance(e.get('track'), dict): - # if entry has track data, attach extractor key - extractor = SoundcloudIE.ie_key() - permalink_url, entry_id, entry_title = resolve_permalink_url( (e, e.get('track'), e.get('playlist'))) + if permalink_url: + extractor = None + if SoundcloudIE.suitable(permalink_url): + # if entry has track data, attach extractor key + extractor = SoundcloudIE.ie_key() + entry_data = self.url_result( permalink_url, ie=extractor, video_id=entry_id, video_title=entry_title) From 852f50387a431fecce97b9e0f6c31b87a1c001ca Mon Sep 17 00:00:00 2001 From: Cory Hall Date: Fri, 1 Feb 2019 17:33:21 -0500 Subject: [PATCH 4/7] [soundcloud] inline `ie` argument --- youtube_dl/extractor/soundcloud.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 4dd906c7249..1e7bf809af1 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -406,14 +406,10 @@ def resolve_permalink_url(candidates): (e, e.get('track'), e.get('playlist'))) if permalink_url: - extractor = None - if SoundcloudIE.suitable(permalink_url): - # if entry has track data, attach extractor key - extractor = SoundcloudIE.ie_key() - entry_data = self.url_result( permalink_url, - ie=extractor, video_id=entry_id, video_title=entry_title) + ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, + video_id=entry_id, video_title=entry_title) entries.append(entry_data) From b94738784421672a8ac9b2eb383f798097aeecb4 Mon Sep 17 00:00:00 2001 From: Cory Hall Date: Fri, 1 Feb 2019 17:49:34 -0500 Subject: [PATCH 5/7] [soundcloud] inline entry append --- youtube_dl/extractor/soundcloud.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 1e7bf809af1..b156adc808b 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -406,12 +406,10 @@ def resolve_permalink_url(candidates): (e, e.get('track'), e.get('playlist'))) if permalink_url: - entry_data = self.url_result( + entries.append(self.url_result( permalink_url, ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, - video_id=entry_id, video_title=entry_title) - - entries.append(entry_data) + video_id=entry_id, video_title=entry_title)) next_href = response.get('next_href') if not next_href: From 3fa0f5b241bc60f330e46467ec517896b29e2942 Mon Sep 17 00:00:00 2001 From: Cory Hall Date: Fri, 1 Feb 2019 18:44:20 -0500 Subject: [PATCH 6/7] [soundcloud] refactor paged playlist processing --- youtube_dl/extractor/soundcloud.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index b156adc808b..e15dcbd77c9 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -392,24 +392,20 @@ def _extract_playlist(self, base_url, playlist_id, playlist_title): if not collection: break - def resolve_permalink_url(candidates): - for cand in candidates: + def append_url_result(entries, item): + for cand in (item, item.get('track'), item.get('playlist')): if isinstance(cand, dict): permalink_url = cand.get('permalink_url') - entry_id = self._extract_id(cand) - title = cand.get('title') if permalink_url and permalink_url.startswith('http'): - return permalink_url, entry_id, title + return entries.append( + self.url_result( + permalink_url, + ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, + video_id=self._extract_id(cand), + video_title=cand.get('title'))) for e in collection: - permalink_url, entry_id, entry_title = resolve_permalink_url( - (e, e.get('track'), e.get('playlist'))) - - if permalink_url: - entries.append(self.url_result( - permalink_url, - ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, - video_id=entry_id, video_title=entry_title)) + append_url_result(entries, e) next_href = response.get('next_href') if not next_href: From 7c20103c0e3232b5bc029116d7b003cf0fee3c87 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sat, 2 Feb 2019 23:34:51 +0700 Subject: [PATCH 7/7] Update soundcloud.py --- youtube_dl/extractor/soundcloud.py | 96 +++++++++++++++++------------- 1 file changed, 56 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index e15dcbd77c9..5536e785144 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -18,6 +18,7 @@ int_or_none, unified_strdate, update_url_query, + url_or_none, ) @@ -34,7 +35,7 @@ class SoundcloudIE(InfoExtractor): (?:(?:(?:www\.|m\.)?soundcloud\.com/ (?!stations/track) (?P[\w\d-]+)/ - (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) + (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) (?P[\w\d-]+)/? (?P<token>[^?]+?)?(?:[?].*)?$) |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) @@ -157,7 +158,7 @@ class SoundcloudIE(InfoExtractor): }, ] - _CLIENT_ID = 'LvWovRaJZlWCHql0bISuum8Bd2KX79mb' + _CLIENT_ID = 'NmW1FlPaiL94ueEu7oziOWjYEzZzQDcK' @staticmethod def _extract_urls(webpage): @@ -368,7 +369,6 @@ def _real_extract(self, url): class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): - _API_BASE = 'https://api.soundcloud.com' _API_V2_BASE = 'https://api-v2.soundcloud.com' def _extract_playlist(self, base_url, playlist_id, playlist_title): @@ -389,23 +389,30 @@ def _extract_playlist(self, base_url, playlist_id, playlist_title): next_href, playlist_id, 'Downloading track page %s' % (i + 1)) collection = response['collection'] - if not collection: - break - def append_url_result(entries, item): - for cand in (item, item.get('track'), item.get('playlist')): - if isinstance(cand, dict): - permalink_url = cand.get('permalink_url') - if permalink_url and permalink_url.startswith('http'): - return entries.append( - self.url_result( - permalink_url, - ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, - video_id=self._extract_id(cand), - video_title=cand.get('title'))) + if not isinstance(collection, list): + collection = [] + + # Empty collection may be returned, in this case we proceed + # straight to next_href + + def resolve_entry(candidates): + for cand in candidates: + if not isinstance(cand, dict): + continue + permalink_url = url_or_none(cand.get('permalink_url')) + if not permalink_url: + continue + return self.url_result( + permalink_url, + ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, + video_id=self._extract_id(cand), + video_title=cand.get('title')) for e in collection: - append_url_result(entries, e) + entry = resolve_entry((e, e.get('track'), e.get('playlist'))) + if entry: + entries.append(entry) next_href = response.get('next_href') if not next_href: @@ -431,46 +438,53 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): (?:(?:www|m)\.)?soundcloud\.com/ (?P<user>[^/]+) (?:/ - (?P<rsrc>tracks|sets|reposts|likes|spotlight) + (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight) )? /?(?:[?#].*)?$ ''' IE_NAME = 'soundcloud:user' _TESTS = [{ - 'url': 'https://soundcloud.com/the-akashic-chronicler', + 'url': 'https://soundcloud.com/soft-cell-official', 'info_dict': { - 'id': '114582580', - 'title': 'The Akashic Chronicler (All)', + 'id': '207965082', + 'title': 'Soft Cell (All)', }, - 'playlist_mincount': 74, + 'playlist_mincount': 28, }, { - 'url': 'https://soundcloud.com/the-akashic-chronicler/tracks', + 'url': 'https://soundcloud.com/soft-cell-official/tracks', 'info_dict': { - 'id': '114582580', - 'title': 'The Akashic Chronicler (Tracks)', + 'id': '207965082', + 'title': 'Soft Cell (Tracks)', }, - 'playlist_mincount': 37, + 'playlist_mincount': 27, }, { - 'url': 'https://soundcloud.com/the-akashic-chronicler/sets', + 'url': 'https://soundcloud.com/soft-cell-official/albums', 'info_dict': { - 'id': '114582580', - 'title': 'The Akashic Chronicler (Playlists)', + 'id': '207965082', + 'title': 'Soft Cell (Albums)', + }, + 'playlist_mincount': 1, + }, { + 'url': 'https://soundcloud.com/jcv246/sets', + 'info_dict': { + 'id': '12982173', + 'title': 'Jordi / cv (Playlists)', }, 'playlist_mincount': 2, }, { - 'url': 'https://soundcloud.com/the-akashic-chronicler/reposts', + 'url': 'https://soundcloud.com/jcv246/reposts', 'info_dict': { - 'id': '114582580', - 'title': 'The Akashic Chronicler (Reposts)', + 'id': '12982173', + 'title': 'Jordi / cv (Reposts)', }, - 'playlist_mincount': 7, + 'playlist_mincount': 6, }, { - 'url': 'https://soundcloud.com/the-akashic-chronicler/likes', + 'url': 'https://soundcloud.com/clalberg/likes', 'info_dict': { - 'id': '114582580', - 'title': 'The Akashic Chronicler (Likes)', + 'id': '11817582', + 'title': 'clalberg (Likes)', }, - 'playlist_mincount': 321, + 'playlist_mincount': 5, }, { 'url': 'https://soundcloud.com/grynpyret/spotlight', 'info_dict': { @@ -481,10 +495,11 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): }] _BASE_URL_MAP = { - 'all': '%s/profile/soundcloud:users:%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_BASE, + 'all': '%s/stream/users/%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'albums': '%s/users/%%s/albums' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, - 'reposts': '%s/profile/soundcloud:users:%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'reposts': '%s/stream/users/%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, 'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, } @@ -492,6 +507,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): _TITLE_MAP = { 'all': 'All', 'tracks': 'Tracks', + 'albums': 'Albums', 'sets': 'Playlists', 'reposts': 'Reposts', 'likes': 'Likes',