Skip to content

Commit

Permalink
[viewlift] Improve extraction (closes #22545)
Browse files Browse the repository at this point in the history
  • Loading branch information
dstftw authored and meunierd committed Feb 13, 2020
1 parent 6c29cab commit cc0966a
Showing 1 changed file with 39 additions and 7 deletions.
46 changes: 39 additions & 7 deletions youtube_dl/extractor/viewlift.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@
js_to_json,
parse_age_limit,
parse_duration,
try_get,
)


class ViewLiftBaseIE(InfoExtractor):
_DOMAINS_REGEX = r'(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm)\.com|hoichoi\.tv'
_DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm)\.com|hoichoi\.tv'


class ViewLiftEmbedIE(ViewLiftBaseIE):
Expand Down Expand Up @@ -113,7 +114,7 @@ def _real_extract(self, url):


class ViewLiftIE(ViewLiftBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)/(?:films/title|show|(?:news/)?videos?)/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX
_VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?:/(?:films/title|show|(?:news/)?videos?))?/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX
_TESTS = [{
'url': 'http://www.snagfilms.com/films/title/lost_for_life',
'md5': '19844f897b35af219773fd63bdec2942',
Expand All @@ -128,7 +129,7 @@ class ViewLiftIE(ViewLiftBaseIE):
'categories': 'mincount:3',
'age_limit': 14,
'upload_date': '20150421',
'timestamp': 1429656819,
'timestamp': 1429656820,
}
}, {
'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
Expand All @@ -141,10 +142,26 @@ class ViewLiftIE(ViewLiftBaseIE):
'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 979,
'categories': 'mincount:2',
'timestamp': 1399478279,
'upload_date': '20140507',
}
}, {
'url': 'http://main.snagfilms.com/augie_alone/s_2_ep_12_love',
'info_dict': {
'id': '00000148-7b53-de26-a9fb-fbf306f70020',
'display_id': 'augie_alone/s_2_ep_12_love',
'ext': 'mp4',
'title': 'Augie, Alone:S. 2 Ep. 12 - Love',
'description': 'md5:db2a5c72d994f16a780c1eb353a8f403',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 107,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://main.snagfilms.com/films/title/the_freebie',
'only_matching': True,
}, {
# Film is not playable in your area.
'url': 'http://www.snagfilms.com/films/title/inside_mecca',
Expand Down Expand Up @@ -181,7 +198,21 @@ def _real_extract(self, url):
gist = content_data['gist']
film_id = gist['id']
title = gist['title']
video_assets = content_data['streamingInfo']['videoAssets']
video_assets = try_get(
content_data, lambda x: x['streamingInfo']['videoAssets'], dict)
if not video_assets:
token = self._download_json(
'https://prod-api.viewlift.com/identity/anonymous-token',
film_id, 'Downloading authorization token',
query={'site': 'snagfilms'})['authorizationToken']
video_assets = self._download_json(
'https://prod-api.viewlift.com/entitlement/video/status',
film_id, headers={
'Authorization': token,
'Referer': url,
}, query={
'id': film_id
})['video']['streamingInfo']['videoAssets']

formats = []
mpeg_video_assets = video_assets.get('mpeg') or []
Expand Down Expand Up @@ -241,8 +272,9 @@ def _real_extract(self, url):
if category.get('title')]
break
else:
title = self._search_regex(
r'itemprop="title">([^<]+)<', webpage, 'title')
title = self._html_search_regex(
(r'itemprop="title">([^<]+)<',
r'(?s)itemprop="title">(.+?)<div'), webpage, 'title')
description = self._html_search_regex(
r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
webpage, 'description', default=None) or self._og_search_description(webpage)
Expand Down

0 comments on commit cc0966a

Please sign in to comment.