forked from ytdl-org/youtube-dl
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[bot] AutoMerging: merge all upstream's changes:
* https://github.com/ytdl-org/youtube-dl: [sportdeutschland] fix extraction(closes ytdl-org#21856)(closes ytdl-org#28425)
- Loading branch information
Showing
1 changed file
with
84 additions
and
61 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,82 +1,105 @@ | ||
# coding: utf-8 | ||
from __future__ import unicode_literals | ||
|
||
import re | ||
|
||
from .common import InfoExtractor | ||
from ..compat import ( | ||
compat_parse_qs, | ||
compat_urllib_parse_urlparse, | ||
) | ||
from ..utils import ( | ||
clean_html, | ||
float_or_none, | ||
int_or_none, | ||
parse_iso8601, | ||
sanitized_Request, | ||
strip_or_none, | ||
try_get, | ||
) | ||
|
||
|
||
class SportDeutschlandIE(InfoExtractor): | ||
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])' | ||
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)' | ||
_TESTS = [{ | ||
'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0', | ||
'info_dict': { | ||
'id': 're-live-deutsche-meisterschaften-2020-halbfinals', | ||
'id': '5318cac0275701382770543d7edaf0a0', | ||
'ext': 'mp4', | ||
'title': 're:Re-live: Deutsche Meisterschaften 2020.*Halbfinals', | ||
'categories': ['Badminton-Deutschland'], | ||
'view_count': int, | ||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$', | ||
'timestamp': int, | ||
'upload_date': '20200201', | ||
'description': 're:.*', # meaningless description for THIS video | ||
'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1', | ||
'duration': 16106.36, | ||
}, | ||
'params': { | ||
'noplaylist': True, | ||
# m3u8 download | ||
'skip_download': True, | ||
}, | ||
}, { | ||
'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0', | ||
'info_dict': { | ||
'id': 'c6e2fdd01f63013854c47054d2ab776f', | ||
'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals', | ||
'description': 'md5:5263ff4c31c04bb780c9f91130b48530', | ||
'duration': 31397, | ||
}, | ||
'playlist_count': 2, | ||
}, { | ||
'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich', | ||
'only_matching': True, | ||
}] | ||
|
||
def _real_extract(self, url): | ||
mobj = re.match(self._VALID_URL, url) | ||
video_id = mobj.group('id') | ||
sport_id = mobj.group('sport') | ||
|
||
api_url = 'https://proxy.vidibusdynamic.net/ssl/backend.sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % ( | ||
sport_id, video_id) | ||
req = sanitized_Request(api_url, headers={ | ||
'Accept': 'application/vnd.vidibus.v2.html+json', | ||
'Referer': url, | ||
}) | ||
data = self._download_json(req, video_id) | ||
|
||
display_id = self._match_id(url) | ||
data = self._download_json( | ||
'https://backend.sportdeutschland.tv/api/permalinks/' + display_id, | ||
display_id, query={'access_token': 'true'}) | ||
asset = data['asset'] | ||
categories = [data['section']['title']] | ||
|
||
formats = [] | ||
smil_url = asset['video'] | ||
if '.smil' in smil_url: | ||
m3u8_url = smil_url.replace('.smil', '.m3u8') | ||
formats.extend( | ||
self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')) | ||
title = (asset.get('title') or asset['label']).strip() | ||
asset_id = asset.get('id') or asset.get('uuid') | ||
info = { | ||
'id': asset_id, | ||
'title': title, | ||
'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'), | ||
'duration': int_or_none(asset.get('seconds')), | ||
} | ||
videos = asset.get('videos') or [] | ||
if len(videos) > 1: | ||
playlist_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('playlistId', [None])[0] | ||
if playlist_id: | ||
if self._downloader.params.get('noplaylist'): | ||
videos = [videos[int(playlist_id)]] | ||
self.to_screen('Downloading just a single video because of --no-playlist') | ||
else: | ||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % asset_id) | ||
|
||
smil_doc = self._download_xml( | ||
smil_url, video_id, note='Downloading SMIL metadata') | ||
base_url_el = smil_doc.find('./head/meta') | ||
if base_url_el: | ||
base_url = base_url_el.attrib['base'] | ||
formats.extend([{ | ||
'format_id': 'rmtp', | ||
'url': base_url if base_url_el else n.attrib['src'], | ||
'play_path': n.attrib['src'], | ||
'ext': 'flv', | ||
'preference': -100, | ||
'format_note': 'Seems to fail at example stream', | ||
} for n in smil_doc.findall('./body/video')]) | ||
def entries(): | ||
for i, video in enumerate(videos, 1): | ||
video_id = video.get('uuid') | ||
video_url = video.get('url') | ||
if not (video_id and video_url): | ||
continue | ||
formats = self._extract_m3u8_formats( | ||
video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False) | ||
if not formats: | ||
continue | ||
yield { | ||
'id': video_id, | ||
'formats': formats, | ||
'title': title + ' - ' + (video.get('label') or 'Teil %d' % i), | ||
'duration': float_or_none(video.get('duration')), | ||
} | ||
info.update({ | ||
'_type': 'multi_video', | ||
'entries': entries(), | ||
}) | ||
else: | ||
formats.append({'url': smil_url}) | ||
|
||
self._sort_formats(formats) | ||
|
||
return { | ||
'id': video_id, | ||
'formats': formats, | ||
'title': asset['title'], | ||
'thumbnail': asset.get('image'), | ||
'description': asset.get('teaser'), | ||
'duration': asset.get('duration'), | ||
'categories': categories, | ||
'view_count': asset.get('views'), | ||
'rtmp_live': asset.get('live'), | ||
'timestamp': parse_iso8601(asset.get('date')), | ||
} | ||
formats = self._extract_m3u8_formats( | ||
videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4') | ||
section_title = strip_or_none(try_get(data, lambda x: x['section']['title'])) | ||
info.update({ | ||
'formats': formats, | ||
'display_id': asset.get('permalink'), | ||
'thumbnail': try_get(asset, lambda x: x['images'][0]), | ||
'categories': [section_title] if section_title else None, | ||
'view_count': int_or_none(asset.get('views')), | ||
'is_live': asset.get('is_live') is True, | ||
'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')), | ||
}) | ||
return info |