From d713c33c4aefd0767807a2f1ac890bb71b5e2a08 Mon Sep 17 00:00:00 2001 From: TRox1972 Date: Fri, 27 May 2016 19:25:51 +0200 Subject: [PATCH 1/2] [EOnline] Add new extractor --- youtube_dl/extractor/eonline.py | 42 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/eonline.py diff --git a/youtube_dl/extractor/eonline.py b/youtube_dl/extractor/eonline.py new file mode 100644 index 00000000000..081b37a2305 --- /dev/null +++ b/youtube_dl/extractor/eonline.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor + +from ..utils import ( + smuggle_url, + update_url_query, +) + +class EOnlineIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?eonline\.com/[a-z]{2}(?:/[a-z-]+){3}/[0-9]+/(?P[a-z-]+)' + _TEST = { + 'url': 'http://www.eonline.com/uk/shows/botched/videos/249184/transgender-woman-takes-a-trip-to-her-past', + 'md5': '1ca5b36c4337fde2b65207e0ad0c11c0', + 'info_dict': { + 'id': 'C872_ktn4Rgc', + 'ext': 'mp4', + 'title': 'Transgender Woman Takes a Trip to Her Past', + 'description': 'md5:621feda5e84d5d4a29f4cc26faa33d24', + 'timestamp': 1464364800, + 'upload_date': '20160527', + 'uploader': 'NBCU-E', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + webpage = self._download_webpage(url, display_id) + + release_url = self._search_regex(r'"videoSourceUrl"\s*:\s*"(.+)"', + webpage, 'ThePlatform ID') + + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url(update_url_query(release_url, {'mbr': 'true', 'switch': 'http'}), + {'force_smil_url': True}), + 'display_id': display_id, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 05561149a72..e3c096f0ba0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -221,6 +221,7 @@ from .elpais import ElPaisIE from .embedly import EmbedlyIE from .engadget import EngadgetIE +from .eonline import EOnlineIE from .eporner import EpornerIE from .eroprofile import EroProfileIE from .escapist import EscapistIE From 7faf6c258cf36414be38f54cac413816274f6e27 Mon Sep 17 00:00:00 2001 From: TRox1972 Date: Thu, 9 Jun 2016 14:27:34 +0200 Subject: [PATCH 2/2] [EOnline] Improvements --- youtube_dl/extractor/eonline.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/eonline.py b/youtube_dl/extractor/eonline.py index 081b37a2305..8ec41e5ccdd 100644 --- a/youtube_dl/extractor/eonline.py +++ b/youtube_dl/extractor/eonline.py @@ -2,41 +2,46 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .common import InfoExtractor from ..utils import ( smuggle_url, update_url_query, ) class EOnlineIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?eonline\.com/[a-z]{2}(?:/[a-z-]+){3}/[0-9]+/(?P[a-z-]+)' + _VALID_URL = r'https?://(?:www\.)?eonline\.com/[a-z]{2}(?:/[a-z-]+){3}/(?P[0-9]+)/(?P[a-z-]+)' _TEST = { 'url': 'http://www.eonline.com/uk/shows/botched/videos/249184/transgender-woman-takes-a-trip-to-her-past', 'md5': '1ca5b36c4337fde2b65207e0ad0c11c0', 'info_dict': { - 'id': 'C872_ktn4Rgc', + 'id': '249184', 'ext': 'mp4', 'title': 'Transgender Woman Takes a Trip to Her Past', 'description': 'md5:621feda5e84d5d4a29f4cc26faa33d24', 'timestamp': 1464364800, 'upload_date': '20160527', - 'uploader': 'NBCU-E', + 'uploader': 'NBCU-E', } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') + video_id, display_id = mobj.group('id', 'display_id') webpage = self._download_webpage(url, display_id) - release_url = self._search_regex(r'"videoSourceUrl"\s*:\s*"(.+)"', - webpage, 'ThePlatform ID') + data = self._parse_json(self._search_regex( + r'evideo.videos.detail\s*=\s*(\[\s*\{[^\]]+]);', + webpage, 'JSON data'), display_id) + + for entry in data: + if entry['id'] == video_id: + release_url = entry['videoSourceUrl'] return { - '_type': 'url_transparent', + '_type': 'url_transparent', 'ie_key': 'ThePlatform', - 'url': smuggle_url(update_url_query(release_url, {'mbr': 'true', 'switch': 'http'}), - {'force_smil_url': True}), + 'url': smuggle_url(update_url_query(release_url, {'mbr': True, 'switch': 'http'}), {'force_smil_url': True}), + 'id': video_id, 'display_id': display_id, }