From aa5a2e882a979dc0067c8a346b2a610ea6e69432 Mon Sep 17 00:00:00 2001 From: Lam Date: Mon, 20 Apr 2020 20:41:52 +0200 Subject: [PATCH] Revert "[rudo] remove extractor(closes #18430)(closes #18474)" This reverts commit d350eb9196f1238b658069531e6e2a30eaf043c2. --- youtube_dl/extractor/biobiochiletv.py | 19 ++++------ youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/rudo.py | 53 +++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 12 deletions(-) create mode 100644 youtube_dl/extractor/rudo.py diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dl/extractor/biobiochiletv.py index dc86c57c5df2..b92031c8ab6b 100644 --- a/youtube_dl/extractor/biobiochiletv.py +++ b/youtube_dl/extractor/biobiochiletv.py @@ -6,6 +6,7 @@ ExtractorError, remove_end, ) +from .rudo import RudoIE class BioBioChileTVIE(InfoExtractor): @@ -40,15 +41,11 @@ class BioBioChileTVIE(InfoExtractor): }, { 'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml', 'info_dict': { - 'id': 'b4xd0LK3SK', + 'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos', 'ext': 'mp4', - # TODO: fix url_transparent information overriding - # 'uploader': 'Juan Pablo Echenique', - 'title': 'Comentario Oscar Cáceres', - }, - 'params': { - # empty m3u8 manifest - 'skip_download': True, + 'uploader': '(none)', + 'upload_date': '20160708', + 'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos', }, }, { 'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml', @@ -63,9 +60,7 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) - rudo_url = self._search_regex( - r']+src=(?P[\'"])(?P(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', - webpage, 'embed URL', None, group='url') + rudo_url = RudoIE._extract_url(webpage) if not rudo_url: raise ExtractorError('No videos found') @@ -73,7 +68,7 @@ def _real_extract(self, url): thumbnail = self._og_search_thumbnail(webpage) uploader = self._html_search_regex( - r']+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)', + r']+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)', webpage, 'uploader', fatal=False) return { diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 156bd043eff0..5c66e155e4eb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -968,6 +968,7 @@ from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE from .rtvs import RTVSIE +from .rudo import RudoIE from .ruhd import RUHDIE from .rutube import ( RutubeIE, diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py new file mode 100644 index 000000000000..f036f67579c5 --- /dev/null +++ b/youtube_dl/extractor/rudo.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + js_to_json, + get_element_by_class, + unified_strdate, +) + + +class RudoIE(InfoExtractor): + _VALID_URL = r'https?://rudo\.video/vod/(?P[0-9a-zA-Z]+)' + + _TEST = { + 'url': 'http://rudo.video/vod/oTzw0MGnyG', + 'md5': '2a03a5b32dd90a04c83b6d391cf7b415', + 'info_dict': { + 'id': 'oTzw0MGnyG', + 'ext': 'mp4', + 'title': 'Comentario Tomás Mosciatti', + 'upload_date': '20160617', + }, + } + + @classmethod + def _extract_url(cls, webpage): + mobj = re.search( + r']+src=(?P[\'"])(?P(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id, encoding='iso-8859-1') + + jwplayer_data = self._parse_json(self._search_regex( + r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id, + transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s))) + + info_dict = self._parse_jwplayer_data( + jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash') + + info_dict.update({ + 'title': self._og_search_title(webpage), + 'upload_date': unified_strdate(get_element_by_class('date', webpage)), + }) + + return info_dict