Skip to content

Commit

Permalink
[bot] AutoMerging: merge all upstream's changes:
Browse files Browse the repository at this point in the history
* https://github.com/ytdl-org/youtube-dl:
  [youtube] Improve stretch extraction and fix stretched ratio calculation (closes ytdl-org#28769)
  [youtube:tab] Improve grid extraction (closes ytdl-org#28725)
  [youtube:tab] Detect series playlist on playlists page (closes ytdl-org#28723)
  [youtube] Add more invidious instances (ytdl-org#28706)
  [pluralsight] Extend anti-throttling timeout (ytdl-org#28712)
  [youtube] Improve URL to extractor routing (closes ytdl-org#27572, closes ytdl-org#28335, closes ytdl-org#28742)
  • Loading branch information
github-actions[bot] committed Apr 16, 2021
2 parents 85fe73a + 54558e0 commit cbf52f2
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 25 deletions.
2 changes: 1 addition & 1 deletion youtube_dl/extractor/pluralsight.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def guess_allowed_qualities():
# To somewhat reduce the probability of these consequences
# we will sleep random amount of time before each call to ViewClip.
self._sleep(
random.randint(2, 5), display_id,
random.randint(5, 10), display_id,
'%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')

if not viewclip:
Expand Down
100 changes: 76 additions & 24 deletions youtube_dl/extractor/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@
)


def parse_qs(url):
return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)


class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors"""
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
Expand Down Expand Up @@ -355,21 +359,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'(?:www\.)?invidious\.mastodon\.host',
r'(?:www\.)?invidious\.zapashcanon\.fr',
r'(?:www\.)?invidious\.kavin\.rocks',
r'(?:www\.)?invidious\.tinfoil-hat\.net',
r'(?:www\.)?invidious\.himiko\.cloud',
r'(?:www\.)?invidious\.reallyancient\.tech',
r'(?:www\.)?invidious\.tube',
r'(?:www\.)?invidiou\.site',
r'(?:www\.)?invidious\.site',
r'(?:www\.)?invidious\.xyz',
r'(?:www\.)?invidious\.nixnet\.xyz',
r'(?:www\.)?invidious\.048596\.xyz',
r'(?:www\.)?invidious\.drycat\.fr',
r'(?:www\.)?inv\.skyn3t\.in',
r'(?:www\.)?tube\.poal\.co',
r'(?:www\.)?tube\.connect\.cafe',
r'(?:www\.)?vid\.wxzm\.sx',
r'(?:www\.)?vid\.mint\.lgbt',
r'(?:www\.)?vid\.puffyan\.us',
r'(?:www\.)?yewtu\.be',
r'(?:www\.)?yt\.elukerio\.org',
r'(?:www\.)?yt\.lelux\.fi',
r'(?:www\.)?invidious\.ggc-project\.de',
r'(?:www\.)?yt\.maisputain\.ovh',
r'(?:www\.)?ytprivate\.com',
r'(?:www\.)?invidious\.13ad\.de',
r'(?:www\.)?invidious\.toot\.koeln',
r'(?:www\.)?invidious\.fdn\.fr',
Expand Down Expand Up @@ -413,16 +424,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
)
)? # all until now is optional -> you can pass the naked ID
(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
(?!.*?\blist=
(?:
%(playlist_id)s| # combined list/video URLs are handled by the playlist IE
WL # WL are handled by the watch later IE
)
)
(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
(?(1).+)? # if we found the ID, everything can follow
$""" % {
'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
'invidious': '|'.join(_INVIDIOUS_SITES),
}
_PLAYER_INFO_RE = (
Expand Down Expand Up @@ -808,6 +812,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
'skip': 'This video does not exist.',
},
{
# Video with incomplete 'yt:stretch=16:'
'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
'only_matching': True,
},
{
# Video licensed under Creative Commons
'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
Expand Down Expand Up @@ -1208,6 +1217,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
}

@classmethod
def suitable(cls, url):
qs = parse_qs(url)
if qs.get('list', [None])[0]:
return False
return super(YoutubeIE, cls).suitable(url)

def __init__(self, *args, **kwargs):
super(YoutubeIE, self).__init__(*args, **kwargs)
self._code_cache = {}
Expand Down Expand Up @@ -1706,13 +1722,16 @@ def feed_entry(name):
for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
for keyword in keywords:
if keyword.startswith('yt:stretch='):
w, h = keyword.split('=')[1].split(':')
w, h = int(w), int(h)
if w > 0 and h > 0:
ratio = w / h
for f in formats:
if f.get('vcodec') != 'none':
f['stretched_ratio'] = ratio
mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
if mobj:
# NB: float is intentional for forcing float division
w, h = (float(v) for v in mobj.groups())
if w > 0 and h > 0:
ratio = w / h
for f in formats:
if f.get('vcodec') != 'none':
f['stretched_ratio'] = ratio
break

thumbnails = []
for container in (video_details, microformat):
Expand Down Expand Up @@ -2008,6 +2027,15 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'title': 'Игорь Клейнер - Playlists',
'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
},
}, {
# playlists, series
'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
'playlist_mincount': 5,
'info_dict': {
'id': 'UCYO_jab_esuFRV4b17AJtAw',
'title': '3Blue1Brown - Playlists',
'description': 'md5:e1384e8a133307dd10edee76e875d62f',
},
}, {
# playlists, singlepage
'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
Expand Down Expand Up @@ -2275,6 +2303,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'title': '#cctv9',
},
'playlist_mincount': 350,
}, {
'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
'only_matching': True,
}]

@classmethod
Expand All @@ -2297,10 +2328,13 @@ def _extract_channel_id(self, webpage):

@staticmethod
def _extract_grid_item_renderer(item):
for item_kind in ('Playlist', 'Video', 'Channel'):
renderer = item.get('grid%sRenderer' % item_kind)
if renderer:
return renderer
assert isinstance(item, dict)
for key, renderer in item.items():
if not key.startswith('grid') or not key.endswith('Renderer'):
continue
if not isinstance(renderer, dict):
continue
return renderer

def _grid_entries(self, grid_renderer):
for item in grid_renderer['items']:
Expand All @@ -2310,18 +2344,21 @@ def _grid_entries(self, grid_renderer):
if not isinstance(renderer, dict):
continue
title = try_get(
renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
renderer, (lambda x: x['title']['runs'][0]['text'],
lambda x: x['title']['simpleText']), compat_str)
# playlist
playlist_id = renderer.get('playlistId')
if playlist_id:
yield self.url_result(
'https://www.youtube.com/playlist?list=%s' % playlist_id,
ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
video_title=title)
continue
# video
video_id = renderer.get('videoId')
if video_id:
yield self._extract_video(renderer)
continue
# channel
channel_id = renderer.get('channelId')
if channel_id:
Expand All @@ -2330,6 +2367,17 @@ def _grid_entries(self, grid_renderer):
yield self.url_result(
'https://www.youtube.com/channel/%s' % channel_id,
ie=YoutubeTabIE.ie_key(), video_title=title)
continue
# generic endpoint URL support
ep_url = urljoin('https://www.youtube.com/', try_get(
renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
compat_str))
if ep_url:
for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
if ie.suitable(ep_url):
yield self.url_result(
ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
break

def _shelf_entries_from_content(self, shelf_renderer):
content = shelf_renderer.get('content')
Expand Down Expand Up @@ -2764,7 +2812,7 @@ def _real_extract(self, url):
url = compat_urlparse.urlunparse(
compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
# Handle both video/playlist URLs
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
qs = parse_qs(url)
video_id = qs.get('v', [None])[0]
playlist_id = qs.get('list', [None])[0]
if video_id and playlist_id:
Expand Down Expand Up @@ -2860,12 +2908,16 @@ class YoutubePlaylistIE(InfoExtractor):

@classmethod
def suitable(cls, url):
return False if YoutubeTabIE.suitable(url) else super(
YoutubePlaylistIE, cls).suitable(url)
if YoutubeTabIE.suitable(url):
return False
qs = parse_qs(url)
if qs.get('v', [None])[0]:
return False
return super(YoutubePlaylistIE, cls).suitable(url)

def _real_extract(self, url):
playlist_id = self._match_id(url)
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
qs = parse_qs(url)
if not qs:
qs = {'list': playlist_id}
return self.url_result(
Expand Down

0 comments on commit cbf52f2

Please sign in to comment.