Skip to content

Commit

Permalink
[twitter] improve extraction
Browse files Browse the repository at this point in the history
- add support for generic embeds(closes #22168)
- always extract http formats for native videos(closes #14934)
- add support for Twitter Broadcasts(closes #21369)
- extract more metadata
- improve VMap format extraction
- unify extraction code for both twitter statuses and cards
  • Loading branch information
remitamine committed Nov 9, 2019
1 parent 0b16b3c commit 18ca61c
Show file tree
Hide file tree
Showing 3 changed files with 348 additions and 311 deletions.
1 change: 1 addition & 0 deletions youtube_dl/extractor/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1241,6 +1241,7 @@
TwitterCardIE,
TwitterIE,
TwitterAmplifyIE,
TwitterBroadcastIE,
)
from .udemy import (
UdemyIE,
Expand Down
80 changes: 49 additions & 31 deletions youtube_dl/extractor/periscope.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,54 @@ def _call_api(self, method, query, item_id):
'https://api.periscope.tv/api/v2/%s' % method,
item_id, query=query)

def _parse_broadcast_data(self, broadcast, video_id):
title = broadcast['status']
uploader = broadcast.get('user_display_name') or broadcast.get('username')
title = '%s - %s' % (uploader, title) if uploader else title
is_live = broadcast.get('state').lower() == 'running'

thumbnails = [{
'url': broadcast[image],
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]

return {
'id': broadcast.get('id') or video_id,
'title': self._live_title(title) if is_live else title,
'timestamp': parse_iso8601(broadcast.get('created_at')),
'uploader': uploader,
'uploader_id': broadcast.get('user_id') or broadcast.get('username'),
'thumbnails': thumbnails,
'view_count': int_or_none(broadcast.get('total_watched')),
'tags': broadcast.get('tags'),
'is_live': is_live,
}

@staticmethod
def _extract_common_format_info(broadcast):
return broadcast.get('state').lower(), int_or_none(broadcast.get('width')), int_or_none(broadcast.get('height'))

@staticmethod
def _add_width_and_height(f, width, height):
for key, val in (('width', width), ('height', height)):
if not f.get(key):
f[key] = val

def _extract_pscp_m3u8_formats(self, m3u8_url, video_id, format_id, state, width, height, fatal=True):
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4',
entry_protocol='m3u8_native'
if state in ('ended', 'timed_out') else 'm3u8',
m3u8_id=format_id, fatal=fatal)
if len(m3u8_formats) == 1:
self._add_width_and_height(m3u8_formats[0], width, height)
return m3u8_formats


class PeriscopeIE(PeriscopeBaseIE):
IE_DESC = 'Periscope'
IE_NAME = 'periscope'
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
# Alive example URLs can be found here http://onperiscope.com/
# Alive example URLs can be found here https://www.periscope.tv/
_TESTS = [{
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
'md5': '65b57957972e503fcbbaeed8f4fa04ca',
Expand Down Expand Up @@ -61,21 +103,9 @@ def _real_extract(self, url):
'accessVideoPublic', {'broadcast_id': token}, token)

broadcast = stream['broadcast']
title = broadcast['status']

uploader = broadcast.get('user_display_name') or broadcast.get('username')
uploader_id = (broadcast.get('user_id') or broadcast.get('username'))
info = self._parse_broadcast_data(broadcast, token)

title = '%s - %s' % (uploader, title) if uploader else title
state = broadcast.get('state').lower()
if state == 'running':
title = self._live_title(title)
timestamp = parse_iso8601(broadcast.get('created_at'))

thumbnails = [{
'url': broadcast[image],
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]

width = int_or_none(broadcast.get('width'))
height = int_or_none(broadcast.get('height'))

Expand All @@ -92,32 +122,20 @@ def add_width_and_height(f):
continue
video_urls.add(video_url)
if format_id != 'rtmp':
m3u8_formats = self._extract_m3u8_formats(
video_url, token, 'mp4',
entry_protocol='m3u8_native'
if state in ('ended', 'timed_out') else 'm3u8',
m3u8_id=format_id, fatal=False)
if len(m3u8_formats) == 1:
add_width_and_height(m3u8_formats[0])
m3u8_formats = self._extract_pscp_m3u8_formats(
video_url, token, format_id, state, width, height, False)
formats.extend(m3u8_formats)
continue
rtmp_format = {
'url': video_url,
'ext': 'flv' if format_id == 'rtmp' else 'mp4',
}
add_width_and_height(rtmp_format)
self._add_width_and_height(rtmp_format)
formats.append(rtmp_format)
self._sort_formats(formats)

return {
'id': broadcast.get('id') or token,
'title': title,
'timestamp': timestamp,
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnails': thumbnails,
'formats': formats,
}
info['formats'] = formats
return info


class PeriscopeUserIE(PeriscopeBaseIE):
Expand Down
Loading

0 comments on commit 18ca61c

Please sign in to comment.