From 29e9c94948a0f34006f1a7e531829a2d0d5ccd99 Mon Sep 17 00:00:00 2001 From: insaneracist Date: Tue, 10 Nov 2020 14:38:13 -0800 Subject: [PATCH] [youtube] stop loading pages if videos are already seen --- youtube_dlc/extractor/youtube.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index 0266e01f4..3bb673e64 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -2894,6 +2894,7 @@ def _extract_playlist(self, playlist_id): yt_initial = self._get_yt_initial_data('', page) if yt_initial: playlist_items = try_get(yt_initial, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents'], list) + video_ids = [] entries = [] playlist_page = 1 api_key = self._search_regex( @@ -2908,6 +2909,10 @@ def _extract_playlist(self, playlist_id): item_video = try_get(item, lambda x: x['playlistVideoRenderer'], dict) if item_video: video_id = try_get(item_video, lambda x: x['videoId'], compat_str) + if video_id in video_ids: + continue + else: + video_ids.append(video_id) entry = { '_type': 'url', 'duration': int_or_none(try_get(item_video, lambda x: x['lengthSeconds'], compat_str)), @@ -2927,7 +2932,7 @@ def _extract_playlist(self, playlist_id): 'context': { 'client': { 'clientName': 'WEB', - 'clientVersion': api_client_version, + 'clientVersion': api_client_version } }, 'continuation': continuation_token @@ -2941,7 +2946,11 @@ def _extract_playlist(self, playlist_id): video_id=playlist_id) playlist_items_new = try_get(response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list) if playlist_items_new: - playlist_items.extend(playlist_items_new) + # load more pages until we get a page of all videos already in the playlist (some playlists loop) + video_ids_new = [try_get(i, lambda x: x['playlistVideoRenderer']['videoId'], compat_str) for i in playlist_items_new] + video_ids_new = [i for i in video_ids_new if i and i not in video_ids] + if video_ids_new: + playlist_items.extend(playlist_items_new) playlist_title = try_get(yt_initial, lambda x: x['microformat']['microformatDataRenderer']['title'], compat_str) playlist_description = try_get(yt_initial, lambda x: x['microformat']['microformatDataRenderer']['description'], compat_str)