From d5ecd044af8d46d68fcdf4bba804e75c11d12aa8 Mon Sep 17 00:00:00 2001 From: Lam Date: Mon, 20 Apr 2020 20:34:41 +0200 Subject: [PATCH] Revert "[yahoo:japannews] Add extractor (closes #21698) (#21265)" This reverts commit 228aa56455dccd3f8bb12891c57ddd6ca22da68d. --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/yahoo.py | 131 ----------------------------- 2 files changed, 132 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 06de556b7ab4..15f54a21455b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1448,7 +1448,6 @@ YahooSearchIE, YahooGyaOPlayerIE, YahooGyaOIE, - YahooJapanNewsIE, ) from .yandexdisk import YandexDiskIE from .yandexmusic import ( diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index e5ebdd1806ec..a3b5f00c8696 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -1,14 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import hashlib import itertools import json import re from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( - compat_str, compat_urllib_parse, compat_urlparse, ) @@ -20,9 +18,7 @@ int_or_none, mimetype2ext, smuggle_url, - try_get, unescapeHTML, - url_or_none, ) from .brightcove import ( @@ -560,130 +556,3 @@ def _real_extract(self, url): 'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'), YahooGyaOPlayerIE.ie_key(), video_id)) return self.playlist_result(entries, program_id) - - -class YahooJapanNewsIE(InfoExtractor): - IE_NAME = 'yahoo:japannews' - IE_DESC = 'Yahoo! Japan News' - _VALID_URL = r'https?://(?P(?:news|headlines)\.yahoo\.co\.jp)[^\d]*(?P\d[\d-]*\d)?' - _GEO_COUNTRIES = ['JP'] - _TESTS = [{ - 'url': 'https://headlines.yahoo.co.jp/videonews/ann?a=20190716-00000071-ann-int', - 'info_dict': { - 'id': '1736242', - 'ext': 'mp4', - 'title': 'ムン大統領が対日批判を強化“現金化”効果は?(テレビ朝日系(ANN)) - Yahoo!ニュース', - 'description': '韓国の元徴用工らを巡る裁判の原告が弁護士が差し押さえた三菱重工業の資産を売却して - Yahoo!ニュース(テレビ朝日系(ANN))', - 'thumbnail': r're:^https?://.*\.[a-zA-Z\d]{3,4}$', - }, - 'params': { - 'skip_download': True, - }, - }, { - # geo restricted - 'url': 'https://headlines.yahoo.co.jp/hl?a=20190721-00000001-oxv-l04', - 'only_matching': True, - }, { - 'url': 'https://headlines.yahoo.co.jp/videonews/', - 'only_matching': True, - }, { - 'url': 'https://news.yahoo.co.jp', - 'only_matching': True, - }, { - 'url': 'https://news.yahoo.co.jp/byline/hashimotojunji/20190628-00131977/', - 'only_matching': True, - }, { - 'url': 'https://news.yahoo.co.jp/feature/1356', - 'only_matching': True - }] - - def _extract_formats(self, json_data, content_id): - formats = [] - - video_data = try_get( - json_data, - lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], - list) - for vid in video_data or []: - delivery = vid.get('delivery') - url = url_or_none(vid.get('Url')) - if not delivery or not url: - continue - elif delivery == 'hls': - formats.extend( - self._extract_m3u8_formats( - url, content_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - else: - formats.append({ - 'url': url, - 'format_id': 'http-%s' % compat_str(vid.get('bitrate', '')), - 'height': int_or_none(vid.get('height')), - 'width': int_or_none(vid.get('width')), - 'tbr': int_or_none(vid.get('bitrate')), - }) - self._remove_duplicate_formats(formats) - self._sort_formats(formats) - - return formats - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') - display_id = mobj.group('id') or host - - webpage = self._download_webpage(url, display_id) - - title = self._html_search_meta( - ['og:title', 'twitter:title'], webpage, 'title', default=None - ) or self._html_search_regex('([^<]+)', webpage, 'title') - - if display_id == host: - # Headline page (w/ multiple BC playlists) ('news.yahoo.co.jp', 'headlines.yahoo.co.jp/videonews/', ...) - stream_plists = re.findall(r'plist=(\d+)', webpage) or re.findall(r'plist["\']:\s*["\']([^"\']+)', webpage) - entries = [ - self.url_result( - smuggle_url( - 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=%s' % plist_id, - {'geo_countries': ['JP']}), - ie='BrightcoveNew', video_id=plist_id) - for plist_id in stream_plists] - return self.playlist_result(entries, playlist_title=title) - - # Article page - description = self._html_search_meta( - ['og:description', 'description', 'twitter:description'], - webpage, 'description', default=None) - thumbnail = self._og_search_thumbnail( - webpage, default=None) or self._html_search_meta( - 'twitter:image', webpage, 'thumbnail', default=None) - space_id = self._search_regex([ - r']+class=["\']yvpub-player["\'][^>]+spaceid=([^&"\']+)', - r'YAHOO\.JP\.srch\.\w+link\.onLoad[^;]+spaceID["\' ]*:["\' ]+([^"\']+)', - r'