Skip to content

Commit

Permalink
[ie/mediasite] Fix extractor
Browse files Browse the repository at this point in the history
  • Loading branch information
kclauhk committed Nov 1, 2024
1 parent 681f02c commit e9b4d58
Showing 1 changed file with 74 additions and 7 deletions.
81 changes: 74 additions & 7 deletions yt_dlp/extractor/mediasite.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext,
float_or_none,
mimetype2ext,
smuggle_url,
Expand All @@ -31,6 +32,9 @@ class MediasiteIE(InfoExtractor):
'ext': 'mp4',
'title': 'Lecture: Tuesday, September 20, 2016 - Sir Andrew Wiles',
'description': 'Sir Andrew Wiles: “Equations in arithmetic”\\n\\nI will describe some of the interactions between modern number theory and the problem of solving equations in rational numbers or integers\\u0027.',
'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$',
'cast': ['Sir Andrew J. Miles, Silver Plaque of the IMU, 1998 | Abel Prize, 2016'],
'duration': 2978.0,
'timestamp': 1474268400.0,
'upload_date': '20160919',
},
Expand All @@ -44,6 +48,7 @@ class MediasiteIE(InfoExtractor):
'title': '5) IT-forum 2015-Dag 1 - Dungbeetle - How and why Rain created a tiny bug tracker for Unity',
'timestamp': 1430311380.0,
},
'skip': 'no longer exist',
},
{
'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
Expand All @@ -54,10 +59,15 @@ class MediasiteIE(InfoExtractor):
'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
'description': '',
'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$',
'cast': ['H. Wijffels'],
'duration': 7713.088,
'timestamp': 1413309600,
'upload_date': '20141014',
},
'params': {
# format 'video1-1.1' HTTP Error 400: Bad Request
'skip_download': True,
},
},
{
'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
Expand All @@ -68,22 +78,50 @@ class MediasiteIE(InfoExtractor):
'title': '64ste Vakantiecursus: Afvalwater',
'description': 'md5:7fd774865cc69d972f542b157c328305',
'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
'cast': ['D.J. van den Berg', 'L.C. Rietveld', 'D. van Halem', 'N.C. van de Giesen', 'J.Q.J.C. Verberk'],
'duration': 10853,
'timestamp': 1326446400,
'upload_date': '20120113',
},
'params': {
'skip_download': True,
},
},
{
'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
'md5': '9422edc9b9a60151727e4b6d8bef393d',
'info_dict': {
'id': '24aace4429fc450fb5b38cdbf424a66e1d',
'ext': 'mp4',
'title': 'Xyce Software Training - Section 1',
'title': 'Xyce Software Training - Section 1 - Apr. 2012',
'description': r're:(?s)SAND Number: SAND 2013-7800.{200,}',
'upload_date': '20120409',
'timestamp': 1333983600,
'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$',
'cast': ['01400 Computation, Computers \\u0026 Math'],
'duration': 7794,
'timestamp': 1333983600,
'upload_date': '20120409',
},
'params': {
# format 'video1-1.0' HTTP Error 400: Bad Request
'skip_download': True,
},
},
{
'url': 'https://events7.mediasite.com/Mediasite/Play/a7812390a2d44739ae857527e05776091d',
'info_dict': {
'id': 'a7812390a2d44739ae857527e05776091d',
'ext': 'mp4',
'title': 'Practical Prevention, Detection and Responses to the New Threat Landscape',
'description': r're:^The bad guys aren’t standing still, and neither is Okta',
'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$',
'cast': ['Franklin Rosado', 'Alex Bovee'],
'duration': 2415.487,
'timestamp': 1472567400,
'upload_date': '20160830',
},
'params': {
# frag 1 too small (only 768B)
'skip_download': True,
},
},
{
Expand Down Expand Up @@ -243,8 +281,8 @@ def _real_extract(self, url):
'ext': ext,
})

images = traverse_obj(player_options, ('PlayerLayoutOptions', 'Images', {dict}))
if stream.get('HasSlideContent') and images:
images = traverse_obj(player_options, ('PlayerLayoutOptions', 'Images', {dict}), default={})
if stream.get('HasSlideContent'):
stream_formats.append(self.__extract_slides(
stream_id=stream_id,
snum=snum,
Expand All @@ -266,9 +304,35 @@ def _real_extract(self, url):
'preference': -1 if stream_type != 0 else 0,
})
formats.extend(stream_formats)
if url_or_none(presentation.get('VodcastUrl')):
formats.append({
'format_id': 'vodcast',
'url': presentation.get('VodcastUrl').split('?attachmentName=')[0],
})

# XXX: Presentation['Presenters']
# XXX: Presentation['Transcript']
transcripts = presentation.get('Transcripts', {})
captions, subtitles = {}, {}
for transcript in transcripts:
lang_code = traverse_obj(
transcript, (('DetailedLanguageCode', 'LanguageCode'), {str}), get_all=False)
lang_name = transcript.get('Language')
t = {
'url': transcript.get('CaptionsUrl'),
'name': lang_name,
}
if 'Auto-Generated' in lang_name:
captions.setdefault(lang_code, []).append(t)
else:
subtitles.setdefault(lang_code, []).append(t)
if transcript_url := presentation.get('TranscriptUrl'):
if determine_ext(transcript_url) != 'txt':
if len(transcripts) == 1 and captions:
captions.setdefault(lang_code, []).append({
'url': transcript_url,
'name': lang_name,
})
else:
subtitles.setdefault('und', []).append({'url': transcript_url})

return {
'id': resource_id,
Expand All @@ -277,7 +341,10 @@ def _real_extract(self, url):
'duration': float_or_none(presentation.get('Duration'), 1000),
'timestamp': float_or_none(presentation.get('UnixTime'), 1000),
'formats': formats,
'automatic_captions': captions,
'subtitles': subtitles,
'thumbnails': thumbnails,
'cast': traverse_obj(presentation, ('Presenters', ..., 'Name', {str})),
}


Expand Down

0 comments on commit e9b4d58

Please sign in to comment.