forked from yt-dlp/yt-dlp
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'yt-dlp:master' into master
- Loading branch information
Showing
6 changed files
with
395 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
import re | ||
from functools import partial | ||
|
||
from .common import InfoExtractor | ||
from ..networking.exceptions import HTTPError | ||
from ..utils import ( | ||
ExtractorError, | ||
bug_reports_message, | ||
clean_html, | ||
format_field, | ||
get_element_text_and_html_by_tag, | ||
int_or_none, | ||
url_or_none, | ||
) | ||
from ..utils.traversal import traverse_obj | ||
|
||
|
||
class BundestagIE(InfoExtractor): | ||
_VALID_URL = [ | ||
r'https?://dbtg\.tv/[cf]vid/(?P<id>\d+)', | ||
r'https?://www\.bundestag\.de/mediathek/?\?(?:[^#]+&)?videoid=(?P<id>\d+)', | ||
] | ||
_TESTS = [{ | ||
'url': 'https://dbtg.tv/cvid/7605304', | ||
'info_dict': { | ||
'id': '7605304', | ||
'ext': 'mp4', | ||
'title': '145. Sitzung vom 15.12.2023, TOP 24 Barrierefreiheit', | ||
'description': 'md5:321a9dc6bdad201264c0045efc371561', | ||
}, | ||
}, { | ||
'url': 'https://www.bundestag.de/mediathek?videoid=7602120&url=L21lZGlhdGhla292ZXJsYXk=&mod=mediathek', | ||
'info_dict': { | ||
'id': '7602120', | ||
'ext': 'mp4', | ||
'title': '130. Sitzung vom 18.10.2023, TOP 1 Befragung der Bundesregierung', | ||
'description': 'Befragung der Bundesregierung', | ||
}, | ||
}, { | ||
'url': 'https://www.bundestag.de/mediathek?videoid=7604941#url=L21lZGlhdGhla292ZXJsYXk/dmlkZW9pZD03NjA0OTQx&mod=mediathek', | ||
'only_matching': True, | ||
}, { | ||
'url': 'http://dbtg.tv/fvid/3594346', | ||
'only_matching': True, | ||
}] | ||
|
||
_OVERLAY_URL = 'https://www.bundestag.de/mediathekoverlay' | ||
_INSTANCE_FORMAT = 'https://cldf-wzw-od.r53.cdn.tv1.eu/13014bundestagod/_definst_/13014bundestag/ondemand/3777parlamentsfernsehen/archiv/app144277506/145293313/{0}/{0}_playlist.smil/playlist.m3u8' | ||
|
||
_SHARE_URL = 'https://webtv.bundestag.de/player/macros/_x_s-144277506/shareData.json?contentId=' | ||
_SHARE_AUDIO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<bitrate>\d+)kb_(?P<channels>\w+)_\w+_\d+\.(?P<ext>\w+)' | ||
_SHARE_VIDEO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<width>\w+)_(?P<height>\w+)_(?P<bitrate>\d+)kb_\w+_\w+_\d+\.(?P<ext>\w+)' | ||
|
||
def _bt_extract_share_formats(self, video_id): | ||
share_data = self._download_json( | ||
f'{self._SHARE_URL}{video_id}', video_id, note='Downloading share format JSON') | ||
if traverse_obj(share_data, ('status', 'code', {int})) != 1: | ||
self.report_warning(format_field( | ||
share_data, [('status', 'message', {str})], | ||
'Share API response: %s', default='Unknown Share API Error') | ||
+ bug_reports_message()) | ||
return | ||
|
||
for name, url in share_data.items(): | ||
if not isinstance(name, str) or not url_or_none(url): | ||
continue | ||
|
||
elif name.startswith('audio'): | ||
match = re.search(self._SHARE_AUDIO_REGEX, url) | ||
yield { | ||
'format_id': name, | ||
'url': url, | ||
'vcodec': 'none', | ||
**traverse_obj(match, { | ||
'acodec': 'codec', | ||
'audio_channels': ('channels', {{'mono': 1, 'stereo': 2}.get}), | ||
'abr': ('bitrate', {int_or_none}), | ||
'ext': 'ext', | ||
}), | ||
} | ||
|
||
elif name.startswith('download'): | ||
match = re.search(self._SHARE_VIDEO_REGEX, url) | ||
yield { | ||
'format_id': name, | ||
'url': url, | ||
**traverse_obj(match, { | ||
'vcodec': 'codec', | ||
'tbr': ('bitrate', {int_or_none}), | ||
'width': ('width', {int_or_none}), | ||
'height': ('height', {int_or_none}), | ||
'ext': 'ext', | ||
}), | ||
} | ||
|
||
def _real_extract(self, url): | ||
video_id = self._match_id(url) | ||
formats = [] | ||
result = {'id': video_id, 'formats': formats} | ||
|
||
try: | ||
formats.extend(self._extract_m3u8_formats( | ||
self._INSTANCE_FORMAT.format(video_id), video_id, m3u8_id='instance')) | ||
except ExtractorError as error: | ||
if isinstance(error.cause, HTTPError) and error.cause.status == 404: | ||
raise ExtractorError('Could not find video id', expected=True) | ||
self.report_warning(f'Error extracting hls formats: {error}', video_id) | ||
formats.extend(self._bt_extract_share_formats(video_id)) | ||
if not formats: | ||
self.raise_no_formats('Could not find suitable formats', video_id=video_id) | ||
|
||
result.update(traverse_obj(self._download_webpage( | ||
self._OVERLAY_URL, video_id, | ||
query={'videoid': video_id, 'view': 'main'}, | ||
note='Downloading metadata overlay', fatal=False, | ||
), { | ||
'title': ( | ||
{partial(get_element_text_and_html_by_tag, 'h3')}, 0, | ||
{partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}), | ||
'description': ({partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}), | ||
})) | ||
|
||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
import datetime | ||
import urllib.parse | ||
|
||
from .common import InfoExtractor | ||
from ..utils import ( | ||
clean_html, | ||
datetime_from_str, | ||
unified_timestamp, | ||
urljoin, | ||
) | ||
|
||
|
||
class JoqrAgIE(InfoExtractor): | ||
IE_DESC = '超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)' | ||
_VALID_URL = [r'https?://www\.uniqueradio\.jp/agplayer5/(?:player|inc-player-hls)\.php', | ||
r'https?://(?:www\.)?joqr\.co\.jp/ag/', | ||
r'https?://(?:www\.)?joqr\.co\.jp/qr/ag(?:daily|regular)program/?(?:$|[#?])'] | ||
_TESTS = [{ | ||
'url': 'https://www.uniqueradio.jp/agplayer5/player.php', | ||
'info_dict': { | ||
'id': 'live', | ||
'title': str, | ||
'channel': '超!A&G+', | ||
'description': str, | ||
'live_status': 'is_live', | ||
'release_timestamp': int, | ||
}, | ||
'params': { | ||
'skip_download': True, | ||
'ignore_no_formats_error': True, | ||
}, | ||
}, { | ||
'url': 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', | ||
'only_matching': True, | ||
}, { | ||
'url': 'https://www.joqr.co.jp/ag/article/103760/', | ||
'only_matching': True, | ||
}, { | ||
'url': 'http://www.joqr.co.jp/qr/agdailyprogram/', | ||
'only_matching': True, | ||
}, { | ||
'url': 'http://www.joqr.co.jp/qr/agregularprogram/', | ||
'only_matching': True, | ||
}] | ||
|
||
def _extract_metadata(self, variable, html): | ||
return clean_html(urllib.parse.unquote_plus(self._search_regex( | ||
rf'var\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', | ||
html, 'metadata', group='value', default=''))) or None | ||
|
||
def _extract_start_timestamp(self, video_id, is_live): | ||
def extract_start_time_from(date_str): | ||
dt = datetime_from_str(date_str) + datetime.timedelta(hours=9) | ||
date = dt.strftime('%Y%m%d') | ||
start_time = self._search_regex( | ||
r'<h3[^>]+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(\d{1,2}:\d{1,2})', | ||
self._download_webpage( | ||
f'https://www.joqr.co.jp/qr/agdailyprogram/?date={date}', video_id, | ||
note=f'Downloading program list of {date}', fatal=False, | ||
errnote=f'Failed to download program list of {date}') or '', | ||
'start time', default=None) | ||
if start_time: | ||
return unified_timestamp(f'{dt.strftime("%Y/%m/%d")} {start_time} +09:00') | ||
return None | ||
|
||
start_timestamp = extract_start_time_from('today') | ||
if not start_timestamp: | ||
return None | ||
|
||
if not is_live or start_timestamp < datetime_from_str('now').timestamp(): | ||
return start_timestamp | ||
else: | ||
return extract_start_time_from('yesterday') | ||
|
||
def _real_extract(self, url): | ||
video_id = 'live' | ||
|
||
metadata = self._download_webpage( | ||
'https://www.uniqueradio.jp/aandg', video_id, | ||
note='Downloading metadata', errnote='Failed to download metadata') | ||
title = self._extract_metadata('Program_name', metadata) | ||
|
||
if title == '放送休止': | ||
formats = [] | ||
live_status = 'is_upcoming' | ||
release_timestamp = self._extract_start_timestamp(video_id, False) | ||
msg = 'This stream is not currently live' | ||
if release_timestamp: | ||
msg += (' and will start at ' | ||
+ datetime.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S')) | ||
self.raise_no_formats(msg, expected=True) | ||
else: | ||
m3u8_path = self._search_regex( | ||
r'<source\s[^>]*\bsrc="([^"]+)"', | ||
self._download_webpage( | ||
'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', video_id, | ||
note='Downloading player data', errnote='Failed to download player data'), | ||
'm3u8 url') | ||
formats = self._extract_m3u8_formats( | ||
urljoin('https://www.uniqueradio.jp/', m3u8_path), video_id) | ||
live_status = 'is_live' | ||
release_timestamp = self._extract_start_timestamp(video_id, True) | ||
|
||
return { | ||
'id': video_id, | ||
'title': title, | ||
'channel': '超!A&G+', | ||
'description': self._extract_metadata('Program_text', metadata), | ||
'formats': formats, | ||
'live_status': live_status, | ||
'release_timestamp': release_timestamp, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
from .common import InfoExtractor | ||
from ..utils import ( | ||
int_or_none, | ||
parse_resolution, | ||
unified_timestamp, | ||
url_or_none, | ||
) | ||
from ..utils.traversal import traverse_obj | ||
|
||
|
||
class MaarivIE(InfoExtractor): | ||
IE_NAME = 'maariv.co.il' | ||
_VALID_URL = r'https?://player\.maariv\.co\.il/public/player\.html\?(?:[^#]+&)?media=(?P<id>\d+)' | ||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})'] | ||
_TESTS = [{ | ||
'url': 'https://player.maariv.co.il/public/player.html?player=maariv-desktop&media=3611585', | ||
'info_dict': { | ||
'id': '3611585', | ||
'duration': 75, | ||
'ext': 'mp4', | ||
'upload_date': '20231009', | ||
'title': 'מבצע חרבות ברזל', | ||
'timestamp': 1696851301, | ||
}, | ||
}] | ||
_WEBPAGE_TESTS = [{ | ||
'url': 'https://www.maariv.co.il/news/law/Article-1044008', | ||
'info_dict': { | ||
'id': '3611585', | ||
'duration': 75, | ||
'ext': 'mp4', | ||
'upload_date': '20231009', | ||
'title': 'מבצע חרבות ברזל', | ||
'timestamp': 1696851301, | ||
}, | ||
}] | ||
|
||
def _real_extract(self, url): | ||
video_id = self._match_id(url) | ||
data = self._download_json( | ||
f'https://dal.walla.co.il/media/{video_id}?origin=player.maariv.co.il', video_id)['data'] | ||
|
||
formats = [] | ||
if hls_url := traverse_obj(data, ('video', 'url', {url_or_none})): | ||
formats.extend(self._extract_m3u8_formats(hls_url, video_id, m3u8_id='hls', fatal=False)) | ||
|
||
for http_format in traverse_obj(data, ('video', 'stream_urls', ..., 'stream_url', {url_or_none})): | ||
formats.append({ | ||
'url': http_format, | ||
'format_id': 'http', | ||
**parse_resolution(http_format), | ||
}) | ||
|
||
return { | ||
'id': video_id, | ||
**traverse_obj(data, { | ||
'title': 'title', | ||
'duration': ('video', 'duration', {int_or_none}), | ||
'timestamp': ('upload_date', {unified_timestamp}), | ||
}), | ||
'formats': formats, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
from .common import InfoExtractor | ||
from ..utils import format_field, parse_iso8601 | ||
|
||
|
||
class RinseFMIE(InfoExtractor): | ||
_VALID_URL = r'https?://(?:www\.)?rinse\.fm/episodes/(?P<id>[^/?#]+)' | ||
_TESTS = [{ | ||
'url': 'https://rinse.fm/episodes/club-glow-15-12-2023-2000/', | ||
'md5': '76ee0b719315617df42e15e710f46c7b', | ||
'info_dict': { | ||
'id': '1536535', | ||
'ext': 'mp3', | ||
'title': 'Club Glow - 15/12/2023 - 20:00', | ||
'thumbnail': r're:^https://.+\.(?:jpg|JPG)$', | ||
'release_timestamp': 1702598400, | ||
'release_date': '20231215' | ||
} | ||
}] | ||
|
||
def _real_extract(self, url): | ||
display_id = self._match_id(url) | ||
webpage = self._download_webpage(url, display_id) | ||
entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry'] | ||
|
||
return { | ||
'id': entry['id'], | ||
'title': entry.get('title'), | ||
'url': entry['fileUrl'], | ||
'vcodec': 'none', | ||
'release_timestamp': parse_iso8601(entry.get('episodeDate')), | ||
'thumbnail': format_field( | ||
entry, [('featuredImage', 0, 'filename')], 'https://rinse.imgix.net/media/%s', default=None), | ||
} |
Oops, something went wrong.