From 31ca3c4c6f4773629be2d91267987be606fce8a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Sun, 26 Jun 2016 17:31:10 +0200 Subject: [PATCH] [OReilly] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/oreilly.py | 38 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 youtube_dl/extractor/oreilly.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9f98a14908d..cdbb1be900d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -575,6 +575,7 @@ ) from .openload import OpenloadIE from .ora import OraTVIE +from .oreilly import OReillyIE from .orf import ( ORFTVthekIE, ORFOE1IE, diff --git a/youtube_dl/extractor/oreilly.py b/youtube_dl/extractor/oreilly.py new file mode 100644 index 00000000000..a8da14f1771 --- /dev/null +++ b/youtube_dl/extractor/oreilly.py @@ -0,0 +1,38 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urlparse + + +class OReillyIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?player\.oreilly\.com/(?:videos|embed)/(?P\d+)' + _TESTS = [{ + 'url': 'https://player.oreilly.com/videos/9781491944639', + 'md5': '6382439f4bc1195bf395c91bd62b5671', + 'info_dict': { + 'id': '0_tz5u5q67', + 'title': '01_modern_data_strategy_mike_olson_cloudera_manuel_martin_marquez_cern', + 'ext': 'mp4', + 'upload_date': '20160602', + 'timestamp': 1464888738, + } + }, { + 'url': 'https://player.oreilly.com/embed/9781491944639', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + url = 'https://player.oreilly.com/embed/%s' % video_id + + webpage = self._download_webpage(url, video_id) + + partner_id = self._search_regex(r'var partnerId = \'([^\']+)\';', + webpage, 'partner ID') + kaltura_id = self._search_regex(r'var externalId = \'([^\']+)\';', + webpage, 'Kaltura ID') + title = self._search_regex(r'var title = \'([^\']+)\';', + webpage, 'title') + + return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), + 'Kaltura', video_title=title)