From bd1340d29483f52bddf27f11adbc3ad488cb0968 Mon Sep 17 00:00:00 2001
From: Glenn Slayden <5589855+glenn-slayden@users.noreply.github.com>
Date: Wed, 24 Jun 2020 23:02:06 -0700
Subject: [PATCH 1/2] Prevent HTTP 301 for YouTube playlist continuations
When a YouTube playlist or channel listing has more than one page of videos, the continuation URLs specify `youtube.com` instead of `www.youtube.com`. This causes an unnecessary HTTP round-trip for each continuation page the extractor accesses.
**Example**
youtube-dl -s --print-traffic https://www.youtube.com/channel/UCBR8-60-B28hp2BmDPdntcQ
**Before**
GET /playlist?list=UUBR8-60-B28hp2BmDPdntcQ&disable_polymer=true
Host: www.youtube.com
HTTP/1.1 200 OK
GET /browse_ajax?action_continuation=1&continuation=4qmFsgIsEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoOZWdaUVZEcERSMUUlM0Q%253D&disable_polymer=true
Host: youtube.com
HTTP/1.1 301 Moved Permanently
Location: https://www.youtube.com/browse_ajax?action_continuation=1&continuation=4qmFsgIsEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoOZWdaUVZEcERSMUUlM0Q%253D&disable_polymer=true
GET /browse_ajax?action_continuation=1&continuation=4qmFsgIsEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoOZWdaUVZEcERSMUUlM0Q%253D&disable_polymer=true
Host: www.youtube.com
HTTP/1.1 200 OK
GET /browse_ajax?action_continuation=1&continuation=4qmFsgIqEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoMZWdkUVZEcERUV2RD&disable_polymer=true
Host: youtube.com
HTTP/1.1 301 Moved Permanently
Location: https://www.youtube.com/browse_ajax?action_continuation=1&continuation=4qmFsgIqEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoMZWdkUVZEcERUV2RD&disable_polymer=true
GET /browse_ajax?action_continuation=1&continuation=4qmFsgIqEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoMZWdkUVZEcERUV2RD&disable_polymer=true
Host: www.youtube.com
HTTP/1.1 200 OK
GET /browse_ajax?action_continuation=1&continuation=4qmFsgIqEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoMZWdkUVZEcERTM2RE&disable_polymer=true
Host: youtube.com
HTTP/1.1 301 Moved Permanently
Location: https://www.youtube.com/browse_ajax?action_continuation=1&continuation=4qmFsgIqEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoMZWdkUVZEcERTM2RE&disable_polymer=true
GET /browse_ajax?action_continuation=1&continuation=4qmFsgIqEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoMZWdkUVZEcERTM2RE&disable_polymer=true
Host: www.youtube.com
HTTP/1.1 200 OK
**After**
GET /playlist?list=UUBR8-60-B28hp2BmDPdntcQ&disable_polymer=true
Host: www.youtube.com
HTTP/1.1 200 OK
GET /browse_ajax?action_continuation=1&continuation=4qmFsgIsEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoOZWdaUVZEcERSMUUlM0Q%253D&disable_polymer=true
Host: www.youtube.com
HTTP/1.1 200 OK
GET /browse_ajax?action_continuation=1&continuation=4qmFsgIqEhpWTFVVQlI4LTYwLUIyOGhwMkJtRFBkbnRjURoMZWdkUVZEcERUV2RD&disable_polymer=true
Host: www.youtube.com
HTTP/1.1 200 OK
---
youtube_dl/extractor/youtube.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 1bc79e01478..638c6617eaf 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -303,7 +303,7 @@ def _entries(self, page, playlist_id):
# Downloading page may result in intermittent 5xx HTTP error
# that is usually worked around with a retry
more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+ 'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
'Downloading page #%s%s'
% (page_num, ' (retry #%d)' % count if count else ''),
transform_source=uppercase_escape,
From 9050896212f28269f1ab6abd2944ab98be8b5b86 Mon Sep 17 00:00:00 2001
From: Sergey M
Date: Wed, 1 Jul 2020 02:54:58 +0700
Subject: [PATCH 2/2] Update youtube.py
---
youtube_dl/extractor/youtube.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 638c6617eaf..4ab58d6b651 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2776,7 +2776,7 @@ def _extract_mix(self, playlist_id):
ids = []
last_id = playlist_id[-11:]
for n in itertools.count(1):
- url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
+ url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
webpage = self._download_webpage(
url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
new_ids = orderedSet(re.findall(
@@ -3286,7 +3286,7 @@ def _entries(self, page):
break
more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
+ 'https://www.youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
'Downloading page #%s' % page_num,
transform_source=uppercase_escape,
headers=self._YOUTUBE_CLIENT_HEADERS)