diff --git a/plugin/manifest.json b/plugin/manifest.json index 2d5bd10..3db5453 100644 --- a/plugin/manifest.json +++ b/plugin/manifest.json @@ -2,7 +2,7 @@ "uuid": "elephant", "name": "Elephant", "description": "Provides support for downloading videos from various sites.", - "version": "1.0.16", + "version": "1.0.17", "icon": "icon.svg", "mediaParser": true, "mediaListParser": true, diff --git a/plugin/yt-dlp/yt_dlp/YoutubeDL.py b/plugin/yt-dlp/yt_dlp/YoutubeDL.py index fd5aa01..9691a1e 100644 --- a/plugin/yt-dlp/yt_dlp/YoutubeDL.py +++ b/plugin/yt-dlp/yt_dlp/YoutubeDL.py @@ -452,7 +452,8 @@ class YoutubeDL: Can also just be a single color policy, in which case it applies to all outputs. Valid stream names are 'stdout' and 'stderr'. - Valid color policies are one of 'always', 'auto', 'no_color' or 'never'. + Valid color policies are one of 'always', 'auto', + 'no_color', 'never', 'auto-tty' or 'no_color-tty'. geo_bypass: Bypass geographic restriction via faking X-Forwarded-For HTTP header geo_bypass_country: @@ -659,12 +660,15 @@ def __init__(self, params=None, auto_init=True): self.params['color'] = 'no_color' term_allow_color = os.getenv('TERM', '').lower() != 'dumb' - no_color = bool(os.getenv('NO_COLOR')) + base_no_color = bool(os.getenv('NO_COLOR')) def process_color_policy(stream): stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream] - policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False) - if policy in ('auto', None): + policy = traverse_obj(self.params, ('color', (stream_name, None), {str}, any)) or 'auto' + if policy in ('auto', 'auto-tty', 'no_color-tty'): + no_color = base_no_color + if policy.endswith('tty'): + no_color = policy.startswith('no_color') if term_allow_color and supports_terminal_sequences(stream): return 'no_color' if no_color else True return False @@ -3168,11 +3172,12 @@ def dl(self, name, info, subtitle=False, test=False): if test: verbose = self.params.get('verbose') + quiet = self.params.get('quiet') or not verbose params = { 'test': True, - 'quiet': self.params.get('quiet') or not verbose, + 'quiet': quiet, 'verbose': verbose, - 'noprogress': not verbose, + 'noprogress': quiet, 'nopart': True, 'skip_unavailable_fragments': False, 'keep_fragments': False, diff --git a/plugin/yt-dlp/yt_dlp/__init__.py b/plugin/yt-dlp/yt_dlp/__init__.py index 0e48569..c0b8e3b 100644 --- a/plugin/yt-dlp/yt_dlp/__init__.py +++ b/plugin/yt-dlp/yt_dlp/__init__.py @@ -468,7 +468,7 @@ def metadataparser_actions(f): default_downloader = ed.get_basename() for policy in opts.color.values(): - if policy not in ('always', 'auto', 'no_color', 'never'): + if policy not in ('always', 'auto', 'auto-tty', 'no_color', 'no_color-tty', 'never'): raise ValueError(f'"{policy}" is not a valid color policy') warnings, deprecation_warnings = [], [] diff --git a/plugin/yt-dlp/yt_dlp/extractor/_extractors.py b/plugin/yt-dlp/yt_dlp/extractor/_extractors.py index fc917ff..9b73fcd 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/_extractors.py +++ b/plugin/yt-dlp/yt_dlp/extractor/_extractors.py @@ -504,7 +504,6 @@ from .digitalconcerthall import DigitalConcertHallIE from .digiteka import DigitekaIE from .discogs import DiscogsReleasePlaylistIE -from .discovery import DiscoveryIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE from .dlf import ( @@ -532,16 +531,12 @@ DiscoveryPlusIndiaShowIE, DiscoveryPlusItalyIE, DiscoveryPlusItalyShowIE, - DIYNetworkIE, DPlayIE, FoodNetworkIE, - GlobalCyclingNetworkPlusIE, GoDiscoveryIE, HGTVDeIE, HGTVUsaIE, InvestigationDiscoveryIE, - MotorTrendIE, - MotorTrendOnDemandIE, ScienceChannelIE, TravelChannelIE, ) @@ -944,6 +939,7 @@ KhanAcademyUnitIE, ) from .kick import ( + KickClipIE, KickIE, KickVODIE, ) @@ -991,6 +987,7 @@ LcpIE, LcpPlayIE, ) +from .learningonscreen import LearningOnScreenIE from .lecture2go import Lecture2GoIE from .lecturio import ( LecturioCourseIE, @@ -2174,10 +2171,7 @@ TV5UnisVideoIE, ) from .tv24ua import TV24UAVideoIE -from .tva import ( - TVAIE, - QubIE, -) +from .tva import TVAIE from .tvanouvelles import ( TVANouvellesArticleIE, TVANouvellesIE, diff --git a/plugin/yt-dlp/yt_dlp/extractor/abematv.py b/plugin/yt-dlp/yt_dlp/extractor/abematv.py index 9471df1..66ab083 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/abematv.py +++ b/plugin/yt-dlp/yt_dlp/extractor/abematv.py @@ -9,12 +9,12 @@ import struct import time import urllib.parse -import urllib.request -import urllib.response import uuid from .common import InfoExtractor from ..aes import aes_ecb_decrypt +from ..networking import RequestHandler, Response +from ..networking.exceptions import TransportError from ..utils import ( ExtractorError, OnDemandPagedList, @@ -26,37 +26,36 @@ traverse_obj, update_url_query, ) -from ..utils.networking import clean_proxies - - -def add_opener(ydl, handler): # FIXME: Create proper API in .networking - """Add a handler for opening URLs, like _download_webpage""" - # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 - # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 - rh = ydl._request_director.handlers['Urllib'] - if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES: - return - headers = ydl.params['http_headers'].copy() - proxies = ydl.proxies.copy() - clean_proxies(proxies, headers) - opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies) - assert isinstance(opener, urllib.request.OpenerDirector) - opener.add_handler(handler) - rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license') - - -class AbemaLicenseHandler(urllib.request.BaseHandler): - handler_order = 499 - STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' - HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E' - - def __init__(self, ie: 'AbemaTVIE'): - # the protocol that this should really handle is 'abematv-license://' - # abematv_license_open is just a placeholder for development purposes - # ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510 - setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open', None)) + + +class AbemaLicenseRH(RequestHandler): + _SUPPORTED_URL_SCHEMES = ('abematv-license',) + _SUPPORTED_PROXY_SCHEMES = None + _SUPPORTED_FEATURES = None + RH_NAME = 'abematv_license' + + _STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' + _HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E' + + def __init__(self, *, ie: 'AbemaTVIE', **kwargs): + super().__init__(**kwargs) self.ie = ie + def _send(self, request): + url = request.url + ticket = urllib.parse.urlparse(url).netloc + + try: + response_data = self._get_videokey_from_ticket(ticket) + except ExtractorError as e: + raise TransportError(cause=e.cause) from e + except (IndexError, KeyError, TypeError) as e: + raise TransportError(cause=repr(e)) from e + + return Response( + io.BytesIO(response_data), url, + headers={'Content-Length': str(len(response_data))}) + def _get_videokey_from_ticket(self, ticket): to_show = self.ie.get_param('verbose', False) media_token = self.ie._get_media_token(to_show=to_show) @@ -72,25 +71,17 @@ def _get_videokey_from_ticket(self, ticket): 'Content-Type': 'application/json', }) - res = decode_base_n(license_response['k'], table=self.STRTABLE) + res = decode_base_n(license_response['k'], table=self._STRTABLE) encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff)) h = hmac.new( - binascii.unhexlify(self.HKEY), + binascii.unhexlify(self._HKEY), (license_response['cid'] + self.ie._DEVICE_ID).encode(), digestmod=hashlib.sha256) enckey = bytes_to_intlist(h.digest()) return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey)) - def abematv_license_open(self, url): - url = url.get_full_url() if isinstance(url, urllib.request.Request) else url - ticket = urllib.parse.urlparse(url).netloc - response_data = self._get_videokey_from_ticket(ticket) - return urllib.response.addinfourl(io.BytesIO(response_data), headers={ - 'Content-Length': str(len(response_data)), - }, url=url, code=200) - class AbemaTVBaseIE(InfoExtractor): _NETRC_MACHINE = 'abematv' @@ -139,7 +130,7 @@ def _get_device_token(self): if self._USERTOKEN: return self._USERTOKEN - add_opener(self._downloader, AbemaLicenseHandler(self)) + self._downloader._request_director.add_handler(AbemaLicenseRH(ie=self, logger=None)) username, _ = self._get_login_info() auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') @@ -386,8 +377,7 @@ def _real_extract(self, url): f'https://api.abema.io/v1/video/programs/{video_id}', video_id, note='Checking playability', headers=headers) - ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType')) - if 3 not in ondemand_types: + if not traverse_obj(api_response, ('label', 'free', {bool})): # cannot acquire decryption key for these streams self.report_warning('This is a premium-only stream') availability = 'premium_only' diff --git a/plugin/yt-dlp/yt_dlp/extractor/adn.py b/plugin/yt-dlp/yt_dlp/extractor/adn.py index 7be990b..3370717 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/adn.py +++ b/plugin/yt-dlp/yt_dlp/extractor/adn.py @@ -16,6 +16,7 @@ float_or_none, int_or_none, intlist_to_bytes, + join_nonempty, long_to_bytes, parse_iso8601, pkcs1pad, @@ -48,9 +49,9 @@ class ADNBaseIE(InfoExtractor): class ADNIE(ADNBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?Pfr|de)/video/[^/?#]+/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?Pde)/)?video/[^/?#]+/(?P\d+)' _TESTS = [{ - 'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir', + 'url': 'https://animationdigitalnetwork.com/video/fruits-basket/9841-episode-1-a-ce-soir', 'md5': '1c9ef066ceb302c86f80c2b371615261', 'info_dict': { 'id': '9841', @@ -70,10 +71,10 @@ class ADNIE(ADNBaseIE): }, 'skip': 'Only available in French and German speaking Europe', }, { - 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', + 'url': 'http://animedigitalnetwork.com/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', 'only_matching': True, }, { - 'url': 'https://animationdigitalnetwork.de/video/the-eminence-in-shadow/23550-folge-1', + 'url': 'https://animationdigitalnetwork.com/de/video/the-eminence-in-shadow/23550-folge-1', 'md5': '5c5651bf5791fa6fcd7906012b9d94e8', 'info_dict': { 'id': '23550', @@ -217,7 +218,7 @@ def _real_extract(self, url): links_data = self._download_json( links_url, video_id, 'Downloading links JSON metadata', headers={ 'X-Player-Token': authorization, - 'X-Target-Distribution': lang, + 'X-Target-Distribution': lang or 'fr', **self._HEADERS, }, query={ 'freeWithAds': 'true', @@ -298,9 +299,9 @@ def _real_extract(self, url): class ADNSeasonIE(ADNBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?Pfr|de)/video/(?P[^/?#]+)/?(?:$|[#?])' + _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?Pde)/)?video/(?P[^/?#]+)/?(?:$|[#?])' _TESTS = [{ - 'url': 'https://animationdigitalnetwork.fr/video/tokyo-mew-mew-new', + 'url': 'https://animationdigitalnetwork.com/video/tokyo-mew-mew-new', 'playlist_count': 12, 'info_dict': { 'id': '911', @@ -318,7 +319,7 @@ def _real_extract(self, url): episodes = self._download_json( f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug, 'Downloading episode list', headers={ - 'X-Target-Distribution': lang, + 'X-Target-Distribution': lang or 'fr', **self._HEADERS, }, query={ 'order': 'asc', @@ -327,8 +328,8 @@ def _real_extract(self, url): def entries(): for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})): - yield self.url_result( - f'https://animationdigitalnetwork.{lang}/video/{video_show_slug}/{episode_id}', - ADNIE, episode_id) + yield self.url_result(join_nonempty( + 'https://animationdigitalnetwork.com', lang, 'video', + video_show_slug, episode_id, delim='/'), ADNIE, episode_id) return self.playlist_result(entries(), show_id, show.get('title')) diff --git a/plugin/yt-dlp/yt_dlp/extractor/afreecatv.py b/plugin/yt-dlp/yt_dlp/extractor/afreecatv.py index f51b5a6..815d205 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/afreecatv.py +++ b/plugin/yt-dlp/yt_dlp/extractor/afreecatv.py @@ -1,6 +1,7 @@ import functools from .common import InfoExtractor +from ..networking import Request from ..utils import ( ExtractorError, OnDemandPagedList, @@ -58,6 +59,13 @@ def _perform_login(self, username, password): f'Unable to login: {self.IE_NAME} said: {error}', expected=True) + def _call_api(self, endpoint, display_id, data=None, headers=None, query=None): + return self._download_json(Request( + f'https://api.m.afreecatv.com/{endpoint}', + data=data, headers=headers, query=query, + extensions={'legacy_ssl': True}), display_id, + 'Downloading API JSON', 'Unable to download API JSON') + class AfreecaTVIE(AfreecaTVBaseIE): IE_NAME = 'afreecatv' @@ -184,12 +192,12 @@ class AfreecaTVIE(AfreecaTVBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - data = self._download_json( - 'https://api.m.afreecatv.com/station/video/a/view', video_id, - headers={'Referer': url}, data=urlencode_postdata({ + data = self._call_api( + 'station/video/a/view', video_id, headers={'Referer': url}, + data=urlencode_postdata({ 'nTitleNo': video_id, 'nApiLevel': 10, - }), impersonate=True)['data'] + }))['data'] error_code = traverse_obj(data, ('code', {int})) if error_code == -6221: @@ -267,9 +275,9 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - data = self._download_json( - 'https://api.m.afreecatv.com/catchstory/a/view', video_id, headers={'Referer': url}, - query={'aStoryListIdx': '', 'nStoryIdx': video_id}, impersonate=True) + data = self._call_api( + 'catchstory/a/view', video_id, headers={'Referer': url}, + query={'aStoryListIdx': '', 'nStoryIdx': video_id}) return self.playlist_result(self._entries(data), video_id) diff --git a/plugin/yt-dlp/yt_dlp/extractor/box.py b/plugin/yt-dlp/yt_dlp/extractor/box.py index 3547ad9..f06339f 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/box.py +++ b/plugin/yt-dlp/yt_dlp/extractor/box.py @@ -12,7 +12,7 @@ class BoxIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P[^/?#]+)(?:/file/(?P\d+))?' + _VALID_URL = r'https?://(?:[^.]+\.)?(?Papp|ent)\.box\.com/s/(?P[^/?#]+)(?:/file/(?P\d+))?' _TESTS = [{ 'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538', 'md5': '1f81b2fd3960f38a40a3b8823e5fcd43', @@ -38,10 +38,22 @@ class BoxIE(InfoExtractor): 'uploader_id': '239068974', }, 'params': {'skip_download': 'dash fragment too small'}, + }, { + 'url': 'https://thejacksonlaboratory.ent.box.com/s/2x09dm6vcg6y28o0oox1so4l0t8wzt6l/file/1536173056065', + 'info_dict': { + 'id': '1536173056065', + 'ext': 'mp4', + 'uploader_id': '18523128264', + 'uploader': 'Lexi Hennigan', + 'title': 'iPSC Symposium recording part 1.mp4', + 'timestamp': 1716228343, + 'upload_date': '20240520', + }, + 'params': {'skip_download': 'dash fragment too small'}, }] def _real_extract(self, url): - shared_name, file_id = self._match_valid_url(url).groups() + shared_name, file_id, service = self._match_valid_url(url).group('shared_name', 'id', 'service') webpage = self._download_webpage(url, file_id or shared_name) if not file_id: @@ -57,14 +69,14 @@ def _real_extract(self, url): request_token = self._search_json( r'Box\.config\s*=', webpage, 'Box config', file_id)['requestToken'] access_token = self._download_json( - 'https://app.box.com/app-api/enduserapp/elements/tokens', file_id, + f'https://{service}.box.com/app-api/enduserapp/elements/tokens', file_id, 'Downloading token JSON metadata', data=json.dumps({'fileIDs': [file_id]}).encode(), headers={ 'Content-Type': 'application/json', 'X-Request-Token': request_token, 'X-Box-EndUser-API': 'sharedName=' + shared_name, })[file_id]['read'] - shared_link = 'https://app.box.com/s/' + shared_name + shared_link = f'https://{service}.box.com/s/{shared_name}' f = self._download_json( 'https://api.box.com/2.0/files/' + file_id, file_id, 'Downloading file JSON metadata', headers={ diff --git a/plugin/yt-dlp/yt_dlp/extractor/cbc.py b/plugin/yt-dlp/yt_dlp/extractor/cbc.py index 1522b08..40224f6 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/cbc.py +++ b/plugin/yt-dlp/yt_dlp/extractor/cbc.py @@ -1,4 +1,5 @@ import base64 +import functools import json import re import time @@ -6,17 +7,24 @@ import xml.etree.ElementTree from .common import InfoExtractor +from ..networking import HEADRequest from ..utils import ( ExtractorError, + float_or_none, int_or_none, join_nonempty, js_to_json, + mimetype2ext, orderedSet, parse_iso8601, + replace_extension, smuggle_url, strip_or_none, traverse_obj, try_get, + update_url, + url_basename, + url_or_none, ) @@ -149,6 +157,7 @@ def _real_extract(self, url): class CBCPlayerIE(InfoExtractor): IE_NAME = 'cbc.ca:player' _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P(?:\d\.)?\d+)' + _GEO_COUNTRIES = ['CA'] _TESTS = [{ 'url': 'http://www.cbc.ca/player/play/2683190193', 'md5': '64d25f841ddf4ddb28a235338af32e2c', @@ -172,21 +181,20 @@ class CBCPlayerIE(InfoExtractor): 'description': 'md5:dd3b692f0a139b0369943150bd1c46a9', 'timestamp': 1425704400, 'upload_date': '20150307', - 'uploader': 'CBCC-NEW', - 'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg', + 'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg', 'chapters': [], 'duration': 494.811, - 'categories': ['AudioMobile/All in a Weekend Montreal'], - 'tags': 'count:8', + 'categories': ['All in a Weekend Montreal'], + 'tags': 'count:11', 'location': 'Quebec', 'series': 'All in a Weekend Montreal', 'season': 'Season 2015', 'season_number': 2015, 'media_type': 'Excerpt', + 'genres': ['Other'], }, }, { 'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2164402062', - 'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6', 'info_dict': { 'id': '2164402062', 'ext': 'mp4', @@ -194,107 +202,168 @@ class CBCPlayerIE(InfoExtractor): 'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.', 'timestamp': 1320410746, 'upload_date': '20111104', - 'uploader': 'CBCC-NEW', - 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg', + 'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg', 'chapters': [], 'duration': 186.867, 'series': 'CBC News: Windsor at 6:00', - 'categories': ['News/Canada/Windsor'], + 'categories': ['Windsor'], 'location': 'Windsor', - 'tags': ['cancer'], - 'creators': ['Allison Johnson'], + 'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'], 'media_type': 'Excerpt', + 'genres': ['News'], }, + 'params': {'skip_download': 'm3u8'}, }, { # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/ 'url': 'https://www.cbc.ca/player/play/1.2985700', 'md5': 'e5e708c34ae6fca156aafe17c43e8b75', 'info_dict': { - 'id': '2657631896', + 'id': '1.2985700', 'ext': 'mp3', 'title': 'CBC Montreal is organizing its first ever community hackathon!', 'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.', 'timestamp': 1425704400, 'upload_date': '20150307', - 'uploader': 'CBCC-NEW', - 'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg', + 'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg', 'chapters': [], 'duration': 494.811, - 'categories': ['AudioMobile/All in a Weekend Montreal'], - 'tags': 'count:8', + 'categories': ['All in a Weekend Montreal'], + 'tags': 'count:11', 'location': 'Quebec', 'series': 'All in a Weekend Montreal', 'season': 'Season 2015', 'season_number': 2015, 'media_type': 'Excerpt', + 'genres': ['Other'], }, }, { 'url': 'https://www.cbc.ca/player/play/1.1711287', - 'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6', 'info_dict': { - 'id': '2164402062', + 'id': '1.1711287', 'ext': 'mp4', 'title': 'Cancer survivor four times over', 'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.', 'timestamp': 1320410746, 'upload_date': '20111104', - 'uploader': 'CBCC-NEW', - 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg', + 'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg', 'chapters': [], 'duration': 186.867, 'series': 'CBC News: Windsor at 6:00', - 'categories': ['News/Canada/Windsor'], + 'categories': ['Windsor'], 'location': 'Windsor', - 'tags': ['cancer'], - 'creators': ['Allison Johnson'], + 'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'], 'media_type': 'Excerpt', + 'genres': ['News'], }, + 'params': {'skip_download': 'm3u8'}, }, { # Has subtitles # These broadcasts expire after ~1 month, can find new test URL here: # https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast - 'url': 'https://www.cbc.ca/player/play/1.7159484', - 'md5': '6ed6cd0fc2ef568d2297ba68a763d455', + 'url': 'https://www.cbc.ca/player/play/video/9.6424403', + 'md5': '8025909eaffcf0adf59922904def9a5e', 'info_dict': { - 'id': '2324213316001', + 'id': '9.6424403', 'ext': 'mp4', - 'title': 'The National | School boards sue social media giants', - 'description': 'md5:4b4db69322fa32186c3ce426da07402c', - 'timestamp': 1711681200, - 'duration': 2743.400, - 'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]}, - 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/607/559/thumbnail.jpeg', - 'uploader': 'CBCC-NEW', + 'title': 'The National | N.W.T. wildfire emergency', + 'description': 'md5:ada33d36d1df69347ed575905bfd496c', + 'timestamp': 1718589600, + 'duration': 2692.833, + 'subtitles': { + 'en-US': [{ + 'name': 'English Captions', + 'url': 'https://cbchls.akamaized.net/delivery/news-shows/2024/06/17/NAT_JUN16-00-55-00/NAT_JUN16_cc.vtt', + }], + }, + 'thumbnail': 'https://i.cbc.ca/ais/6272b5c6-5e78-4c05-915d-0e36672e33d1,1714756287822/full/max/0/default.jpg', 'chapters': 'count:5', - 'upload_date': '20240329', - 'categories': 'count:4', + 'upload_date': '20240617', + 'categories': ['News', 'The National', 'The National Latest Broadcasts'], 'series': 'The National - Full Show', - 'tags': 'count:1', - 'creators': ['News'], + 'tags': ['The National'], 'location': 'Canada', 'media_type': 'Full Program', + 'genres': ['News'], }, }, { 'url': 'https://www.cbc.ca/player/play/video/1.7194274', 'md5': '188b96cf6bdcb2540e178a6caa957128', 'info_dict': { - 'id': '2334524995812', + 'id': '1.7194274', 'ext': 'mp4', 'title': '#TheMoment a rare white spirit moose was spotted in Alberta', 'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3', 'timestamp': 1714788791, 'duration': 77.678, 'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]}, - 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg', - 'uploader': 'CBCC-NEW', - 'chapters': 'count:0', - 'upload_date': '20240504', + 'thumbnail': 'https://i.cbc.ca/ais/1.7194274,1717224990425/full/max/0/default.jpg', + 'chapters': [], 'categories': 'count:3', 'series': 'The National', - 'tags': 'count:15', - 'creators': ['encoder'], + 'tags': 'count:17', + 'location': 'Canada', + 'media_type': 'Excerpt', + 'upload_date': '20240504', + 'genres': ['News'], + }, + }, { + 'url': 'https://www.cbc.ca/player/play/video/9.6427282', + 'info_dict': { + 'id': '9.6427282', + 'ext': 'mp4', + 'title': 'Men\'s Soccer - Argentina vs Morocco', + 'description': 'Argentina faces Morocco on the football pitch at Saint Etienne Stadium.', + 'series': 'CBC Sports', + 'media_type': 'Event Coverage', + 'thumbnail': 'https://i.cbc.ca/ais/a4c5c0c2-99fa-4bd3-8061-5a63879c1b33,1718828053500/full/max/0/default.jpg', + 'timestamp': 1721825400.0, + 'upload_date': '20240724', + 'duration': 10568.0, + 'chapters': [], + 'genres': [], + 'tags': ['2024 Paris Olympic Games'], + 'categories': ['Olympics Summer Soccer', 'Summer Olympics Replays', 'Summer Olympics Soccer Replays'], 'location': 'Canada', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.cbc.ca/player/play/video/9.6459530', + 'md5': '6c1bb76693ab321a2e99c347a1d5ecbc', + 'info_dict': { + 'id': '9.6459530', + 'ext': 'mp4', + 'title': 'Parts of Jasper incinerated as wildfire rages', + 'description': 'md5:6f1caa8d128ad3f629257ef5fecf0962', + 'series': 'The National', + 'media_type': 'Excerpt', + 'thumbnail': 'https://i.cbc.ca/ais/507c0086-31a2-494d-96e4-bffb1048d045,1721953984375/full/max/0/default.jpg', + 'timestamp': 1721964091.012, + 'upload_date': '20240726', + 'duration': 952.285, + 'chapters': [], + 'genres': [], + 'tags': 'count:23', + 'categories': ['News (FAST)', 'News', 'The National', 'TV News Shows', 'The National '], + }, + }, { + 'url': 'https://www.cbc.ca/player/play/video/9.6420651', + 'md5': '71a850c2c6ee5e912de169f5311bb533', + 'info_dict': { + 'id': '9.6420651', + 'ext': 'mp4', + 'title': 'Is it a breath of fresh air? Measuring air quality in Edmonton', + 'description': 'md5:3922b92cc8b69212d739bd9dd095b1c3', + 'series': 'CBC News Edmonton', 'media_type': 'Excerpt', + 'thumbnail': 'https://i.cbc.ca/ais/73c4ab9c-7ad4-46ee-bb9b-020fdc01c745,1718214547576/full/max/0/default.jpg', + 'timestamp': 1718220065.768, + 'upload_date': '20240612', + 'duration': 286.086, + 'chapters': [], + 'genres': ['News'], + 'categories': ['News', 'Edmonton'], + 'tags': 'count:7', + 'location': 'Edmonton', }, }, { 'url': 'cbcplayer:1.7159484', @@ -307,23 +376,113 @@ class CBCPlayerIE(InfoExtractor): 'only_matching': True, }] + def _parse_param(self, asset_data, name): + return traverse_obj(asset_data, ('params', lambda _, v: v['name'] == name, 'value', {str}, any)) + def _real_extract(self, url): video_id = self._match_id(url) - if '.' in video_id: - webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id) - video_id = self._search_json( - r'window\.__INITIAL_STATE__\s*=', webpage, - 'initial state', video_id)['video']['currentClip']['mediaId'] + webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id) + data = self._search_json( + r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)['video']['currentClip'] + assets = traverse_obj( + data, ('media', 'assets', lambda _, v: url_or_none(v['key']) and v['type'])) + + if not assets and (media_id := traverse_obj(data, ('mediaId', {str}))): + # XXX: Deprecated; CBC is migrating off of ThePlatform + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url( + f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{media_id}?mbr=true&formats=MPEG4,FLV,MP3', { + 'force_smil_url': True, + }), + 'id': media_id, + '_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS + } + + is_live = traverse_obj(data, ('media', 'streamType', {str})) == 'Live' + formats, subtitles = [], {} + + for sub in traverse_obj(data, ('media', 'textTracks', lambda _, v: url_or_none(v['src']))): + subtitles.setdefault(sub.get('language') or 'und', []).append({ + 'url': sub['src'], + 'name': sub.get('label'), + }) + + for asset in assets: + asset_key = asset['key'] + asset_type = asset['type'] + if asset_type != 'medianet': + self.report_warning(f'Skipping unsupported asset type "{asset_type}": {asset_key}') + continue + asset_data = self._download_json(asset_key, video_id, f'Downloading {asset_type} JSON') + ext = mimetype2ext(self._parse_param(asset_data, 'contentType')) + if ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + asset_data['url'], video_id, 'mp4', m3u8_id='hls', live=is_live) + formats.extend(fmts) + # Avoid slow/error-prone webvtt-over-m3u8 if direct https vtt is available + if not subtitles: + self._merge_subtitles(subs, target=subtitles) + if is_live or not fmts: + continue + # Check for direct https mp4 format + best_video_fmt = traverse_obj(fmts, ( + lambda _, v: v.get('vcodec') != 'none' and v['tbr'], all, + {functools.partial(sorted, key=lambda x: x['tbr'])}, -1, {dict})) or {} + base_url = self._search_regex( + r'(https?://[^?#]+?/)hdntl=', best_video_fmt.get('url'), 'base url', default=None) + if not base_url or '/live/' in base_url: + continue + mp4_url = base_url + replace_extension(url_basename(best_video_fmt['url']), 'mp4') + if self._request_webpage( + HEADRequest(mp4_url), video_id, 'Checking for https format', + errnote=False, fatal=False): + formats.append({ + **best_video_fmt, + 'url': mp4_url, + 'format_id': 'https-mp4', + 'protocol': 'https', + 'manifest_url': None, + 'acodec': None, + }) + else: + formats.append({ + 'url': asset_data['url'], + 'ext': ext, + 'vcodec': 'none' if self._parse_param(asset_data, 'mediaType') == 'audio' else None, + }) + + chapters = traverse_obj(data, ( + 'media', 'chapters', lambda _, v: float(v['startTime']) is not None, { + 'start_time': ('startTime', {functools.partial(float_or_none, scale=1000)}), + 'end_time': ('endTime', {functools.partial(float_or_none, scale=1000)}), + 'title': ('name', {str}), + })) + # Filter out pointless single chapters with start_time==0 and no end_time + if len(chapters) == 1 and not (chapters[0].get('start_time') or chapters[0].get('end_time')): + chapters = [] return { - '_type': 'url_transparent', - 'ie_key': 'ThePlatform', - 'url': smuggle_url( - f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{video_id}?mbr=true&formats=MPEG4,FLV,MP3', { - 'force_smil_url': True, - }), + **traverse_obj(data, { + 'title': ('title', {str}), + 'description': ('description', {str.strip}), + 'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}), + 'timestamp': ('publishedAt', {functools.partial(float_or_none, scale=1000)}), + 'media_type': ('media', 'clipType', {str}), + 'series': ('showName', {str}), + 'season_number': ('media', 'season', {int_or_none}), + 'duration': ('media', 'duration', {float_or_none}, {lambda x: None if is_live else x}), + 'location': ('media', 'region', {str}), + 'tags': ('tags', ..., 'name', {str}), + 'genres': ('media', 'genre', all), + 'categories': ('categories', ..., 'name', {str}), + }), 'id': video_id, - '_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS + 'formats': formats, + 'subtitles': subtitles, + 'chapters': chapters, + 'is_live': is_live, } @@ -647,11 +806,11 @@ class CBCGemLiveIE(InfoExtractor): 'title': 'Ottawa', 'description': 'The live TV channel and local programming from Ottawa', 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg', - 'is_live': True, + 'live_status': 'is_live', 'id': 'AyqZwxRqh8EH', 'ext': 'mp4', - 'timestamp': 1492106160, - 'upload_date': '20170413', + 'release_timestamp': 1492106160, + 'release_date': '20170413', 'uploader': 'CBCC-NEW', }, 'skip': 'Live might have ended', @@ -680,49 +839,84 @@ class CBCGemLiveIE(InfoExtractor): 'description': 'March 24, 2023 | President Biden’s Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.', 'live_status': 'is_live', 'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*', - 'timestamp': 1679706000, - 'upload_date': '20230325', + 'release_timestamp': 1679706000, + 'release_date': '20230325', }, 'params': {'skip_download': True}, 'skip': 'Live might have ended', }, + { # event replay (medianetlive) + 'url': 'https://gem.cbc.ca/live-event/42314', + 'md5': '297a9600f554f2258aed01514226a697', + 'info_dict': { + 'id': '42314', + 'ext': 'mp4', + 'live_status': 'was_live', + 'title': 'Women\'s Soccer - Canada vs New Zealand', + 'description': 'md5:36200e5f1a70982277b5a6ecea86155d', + 'thumbnail': r're:https://.+default\.jpg', + 'release_timestamp': 1721917200, + 'release_date': '20240725', + }, + 'params': {'skip_download': True}, + 'skip': 'Replay might no longer be available', + }, + { # event replay (medianetlive) + 'url': 'https://gem.cbc.ca/live-event/43273', + 'only_matching': True, + }, ] + _GEO_COUNTRIES = ['CA'] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data'] - # Two types of metadata JSON + # Three types of video_info JSON: info in root, freeTv stream/item, event replay if not video_info.get('formattedIdMedia'): - video_info = traverse_obj( - video_info, (('freeTv', ('streams', ...)), 'items', lambda _, v: v['key'] == video_id, {dict}), - get_all=False, default={}) + if traverse_obj(video_info, ('event', 'key')) == video_id: + video_info = video_info['event'] + else: + video_info = traverse_obj(video_info, ( + ('freeTv', ('streams', ...)), 'items', + lambda _, v: v['key'].partition('-')[0] == video_id, any)) or {} video_stream_id = video_info.get('formattedIdMedia') if not video_stream_id: - raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True) - - stream_data = self._download_json( - 'https://services.radio-canada.ca/media/validation/v2/', video_id, query={ - 'appCode': 'mpx', - 'connectionType': 'hd', - 'deviceType': 'ipad', - 'idMedia': video_stream_id, - 'multibitrate': 'true', - 'output': 'json', - 'tech': 'hls', - 'manifestType': 'desktop', - }) + raise ExtractorError( + 'Couldn\'t find video metadata, maybe this livestream is now offline', expected=True) + + live_status = 'was_live' if video_info.get('isVodEnabled') else 'is_live' + release_timestamp = traverse_obj(video_info, ('airDate', {parse_iso8601})) + + if live_status == 'is_live' and release_timestamp and release_timestamp > time.time(): + formats = [] + live_status = 'is_upcoming' + self.raise_no_formats('This livestream has not yet started', expected=True) + else: + stream_data = self._download_json( + 'https://services.radio-canada.ca/media/validation/v2/', video_id, query={ + 'appCode': 'medianetlive', + 'connectionType': 'hd', + 'deviceType': 'ipad', + 'idMedia': video_stream_id, + 'multibitrate': 'true', + 'output': 'json', + 'tech': 'hls', + 'manifestType': 'desktop', + }) + formats = self._extract_m3u8_formats( + stream_data['url'], video_id, 'mp4', live=live_status == 'is_live') return { 'id': video_id, - 'formats': self._extract_m3u8_formats(stream_data['url'], video_id, 'mp4', live=True), - 'is_live': True, + 'formats': formats, + 'live_status': live_status, + 'release_timestamp': release_timestamp, **traverse_obj(video_info, { - 'title': 'title', - 'description': 'description', + 'title': ('title', {str}), + 'description': ('description', {str}), 'thumbnail': ('images', 'card', 'url'), - 'timestamp': ('airDate', {parse_iso8601}), }), } diff --git a/plugin/yt-dlp/yt_dlp/extractor/common.py b/plugin/yt-dlp/yt_dlp/extractor/common.py index f63bd78..187f73e 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/common.py +++ b/plugin/yt-dlp/yt_dlp/extractor/common.py @@ -3150,7 +3150,7 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None): }) return formats, subtitles - def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None): + def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None, _headers=None): def absolute_url(item_url): return urljoin(base_url, item_url) @@ -3174,11 +3174,11 @@ def _media_formats(src, cur_media_type, type_info=None): formats = self._extract_m3u8_formats( full_url, video_id, ext='mp4', entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id, - preference=preference, quality=quality, fatal=False) + preference=preference, quality=quality, fatal=False, headers=_headers) elif ext == 'mpd': is_plain_url = False formats = self._extract_mpd_formats( - full_url, video_id, mpd_id=mpd_id, fatal=False) + full_url, video_id, mpd_id=mpd_id, fatal=False, headers=_headers) else: is_plain_url = True formats = [{ @@ -3272,6 +3272,8 @@ def _media_formats(src, cur_media_type, type_info=None): }) for f in media_info['formats']: f.setdefault('http_headers', {})['Referer'] = base_url + if _headers: + f['http_headers'].update(_headers) if media_info['formats'] or media_info['subtitles']: entries.append(media_info) return entries diff --git a/plugin/yt-dlp/yt_dlp/extractor/digitalconcerthall.py b/plugin/yt-dlp/yt_dlp/extractor/digitalconcerthall.py index 8b4d5c0..edb6fa9 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/digitalconcerthall.py +++ b/plugin/yt-dlp/yt_dlp/extractor/digitalconcerthall.py @@ -1,6 +1,8 @@ from .common import InfoExtractor +from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, + parse_codecs, try_get, url_or_none, urlencode_postdata, @@ -12,6 +14,7 @@ class DigitalConcertHallIE(InfoExtractor): IE_DESC = 'DigitalConcertHall extractor' _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P[a-z]+)/(?Pfilm|concert|work)/(?P[0-9]+)-?(?P[0-9]+)?' _OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token' + _USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15' _ACCESS_TOKEN = None _NETRC_MACHINE = 'digitalconcerthall' _TESTS = [{ @@ -68,33 +71,42 @@ class DigitalConcertHallIE(InfoExtractor): }] def _perform_login(self, username, password): - token_response = self._download_json( + login_token = self._download_json( self._OAUTH_URL, None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({ 'affiliate': 'none', 'grant_type': 'device', 'device_vendor': 'unknown', + # device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio + 'device_model': 'unknown' if self._configuration_arg('prefer_combined_hls') else 'Safari', 'app_id': 'dch.webapp', - 'app_version': '1.0.0', + 'app_distributor': 'berlinphil', + 'app_version': '1.84.0', 'client_secret': '2ySLN+2Fwb', }), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - }) - self._ACCESS_TOKEN = token_response['access_token'] + 'Accept': 'application/json', + 'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8', + 'User-Agent': self._USER_AGENT, + })['access_token'] try: - self._download_json( + login_response = self._download_json( self._OAUTH_URL, None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({ 'grant_type': 'password', 'username': username, 'password': password, }), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', + 'Accept': 'application/json', + 'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8', 'Referer': 'https://www.digitalconcerthall.com', - 'Authorization': f'Bearer {self._ACCESS_TOKEN}', + 'Authorization': f'Bearer {login_token}', + 'User-Agent': self._USER_AGENT, }) - except ExtractorError: - self.raise_login_required(msg='Login info incorrect') + except ExtractorError as error: + if isinstance(error.cause, HTTPError) and error.cause.status == 401: + raise ExtractorError('Invalid username or password', expected=True) + raise + self._ACCESS_TOKEN = login_response['access_token'] def _real_initialize(self): if not self._ACCESS_TOKEN: @@ -108,11 +120,15 @@ def _entries(self, items, language, type_, **kwargs): 'Accept': 'application/json', 'Authorization': f'Bearer {self._ACCESS_TOKEN}', 'Accept-Language': language, + 'User-Agent': self._USER_AGENT, }) formats = [] for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})): - formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False)) + formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + for fmt in formats: + if fmt.get('format_note') and fmt.get('vcodec') == 'none': + fmt.update(parse_codecs(fmt['format_note'])) yield { 'id': video_id, @@ -140,13 +156,15 @@ def _real_extract(self, url): f'https://api.digitalconcerthall.com/v2/{api_type}/{video_id}', video_id, headers={ 'Accept': 'application/json', 'Accept-Language': language, + 'User-Agent': self._USER_AGENT, + 'Authorization': f'Bearer {self._ACCESS_TOKEN}', }) - album_artists = traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...)) if type_ == 'work': videos = [videos[int(part) - 1]] + album_artists = traverse_obj(vid_info, ('_links', 'artist', ..., 'name', {str})) thumbnail = traverse_obj(vid_info, ( 'image', ..., {self._proto_relative_url}, {url_or_none}, {lambda x: x.format(width=0, height=0)}, any)) # NB: 0x0 is the original size diff --git a/plugin/yt-dlp/yt_dlp/extractor/discovery.py b/plugin/yt-dlp/yt_dlp/extractor/discovery.py deleted file mode 100644 index b98279d..0000000 --- a/plugin/yt-dlp/yt_dlp/extractor/discovery.py +++ /dev/null @@ -1,115 +0,0 @@ -import random -import string -import urllib.parse - -from .discoverygo import DiscoveryGoBaseIE -from ..networking.exceptions import HTTPError -from ..utils import ExtractorError - - -class DiscoveryIE(DiscoveryGoBaseIE): - _VALID_URL = r'''(?x)https?:// - (?P - go\.discovery| - www\. - (?: - investigationdiscovery| - discoverylife| - animalplanet| - ahctv| - destinationamerica| - sciencechannel| - tlc - )| - watch\. - (?: - hgtv| - foodnetwork| - travelchannel| - diynetwork| - cookingchanneltv| - motortrend - ) - )\.com/tv-shows/(?P[^/]+)/(?:video|full-episode)s/(?P[^./?#]+)''' - _TESTS = [{ - 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry', - 'info_dict': { - 'id': '5a2f35ce6b66d17a5026e29e', - 'ext': 'mp4', - 'title': 'Riding with Matthew Perry', - 'description': 'md5:a34333153e79bc4526019a5129e7f878', - 'duration': 84, - }, - 'params': { - 'skip_download': True, # requires ffmpeg - }, - }, { - 'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision', - 'only_matching': True, - }, { - 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road', - 'only_matching': True, - }, { - # using `show_slug` is important to get the correct video data - 'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special', - 'only_matching': True, - }] - _GEO_COUNTRIES = ['US'] - _GEO_BYPASS = False - _API_BASE_URL = 'https://api.discovery.com/v1/' - - def _real_extract(self, url): - site, show_slug, display_id = self._match_valid_url(url).groups() - - access_token = None - cookies = self._get_cookies(url) - - # prefer Affiliate Auth Token over Anonymous Auth Token - auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn') - if auth_storage_cookie and auth_storage_cookie.value: - auth_storage = self._parse_json(urllib.parse.unquote( - urllib.parse.unquote(auth_storage_cookie.value)), - display_id, fatal=False) or {} - access_token = auth_storage.get('a') or auth_storage.get('access_token') - - if not access_token: - access_token = self._download_json( - f'https://{site}.com/anonymous', display_id, - 'Downloading token JSON metadata', query={ - 'authRel': 'authorization', - 'client_id': '3020a40c2356a645b4b4', - 'nonce': ''.join(random.choices(string.ascii_letters, k=32)), - 'redirectUri': 'https://www.discovery.com/', - })['access_token'] - - headers = self.geo_verification_headers() - headers['Authorization'] = 'Bearer ' + access_token - - try: - video = self._download_json( - self._API_BASE_URL + 'content/videos', - display_id, 'Downloading content JSON metadata', - headers=headers, query={ - 'embed': 'show.name', - 'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags', - 'slug': display_id, - 'show_slug': show_slug, - })[0] - video_id = video['id'] - stream = self._download_json( - self._API_BASE_URL + 'streaming/video/' + video_id, - display_id, 'Downloading streaming JSON metadata', headers=headers) - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403): - e_description = self._parse_json( - e.cause.response.read().decode(), display_id)['description'] - if 'resource not available for country' in e_description: - self.raise_geo_restricted(countries=self._GEO_COUNTRIES) - if 'Authorized Networks' in e_description: - raise ExtractorError( - 'This video is only available via cable service provider subscription that' - ' is not currently supported. You may want to use --cookies.', expected=True) - raise ExtractorError(e_description) - raise - - return self._extract_video_info(video, stream, display_id) diff --git a/plugin/yt-dlp/yt_dlp/extractor/discoverygo.py b/plugin/yt-dlp/yt_dlp/extractor/discoverygo.py deleted file mode 100644 index 9649485..0000000 --- a/plugin/yt-dlp/yt_dlp/extractor/discoverygo.py +++ /dev/null @@ -1,171 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - determine_ext, - extract_attributes, - int_or_none, - parse_age_limit, - remove_end, - unescapeHTML, - url_or_none, -) - - -class DiscoveryGoBaseIE(InfoExtractor): - _VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?: - discovery| - investigationdiscovery| - discoverylife| - animalplanet| - ahctv| - destinationamerica| - sciencechannel| - tlc| - velocitychannel - )go\.com/%s(?P[^/?#&]+)''' - - def _extract_video_info(self, video, stream, display_id): - title = video['name'] - - if not stream: - if video.get('authenticated') is True: - raise ExtractorError( - 'This video is only available via cable service provider subscription that' - ' is not currently supported. You may want to use --cookies.', expected=True) - else: - raise ExtractorError('Unable to find stream') - STREAM_URL_SUFFIX = 'streamUrl' - formats = [] - for stream_kind in ('', 'hds'): - suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX - stream_url = stream.get(f'{stream_kind}{suffix}') - if not stream_url: - continue - if stream_kind == '': - formats.extend(self._extract_m3u8_formats( - stream_url, display_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif stream_kind == 'hds': - formats.extend(self._extract_f4m_formats( - stream_url, display_id, f4m_id=stream_kind, fatal=False)) - - video_id = video.get('id') or display_id - description = video.get('description', {}).get('detailed') - duration = int_or_none(video.get('duration')) - - series = video.get('show', {}).get('name') - season_number = int_or_none(video.get('season', {}).get('number')) - episode_number = int_or_none(video.get('episodeNumber')) - - tags = video.get('tags') - age_limit = parse_age_limit(video.get('parental', {}).get('rating')) - - subtitles = {} - captions = stream.get('captions') - if isinstance(captions, list): - for caption in captions: - subtitle_url = url_or_none(caption.get('fileUrl')) - if not subtitle_url or not subtitle_url.startswith('http'): - continue - lang = caption.get('fileLang', 'en') - ext = determine_ext(subtitle_url) - subtitles.setdefault(lang, []).append({ - 'url': subtitle_url, - 'ext': 'ttml' if ext == 'xml' else ext, - }) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'duration': duration, - 'series': series, - 'season_number': season_number, - 'episode_number': episode_number, - 'tags': tags, - 'age_limit': age_limit, - 'formats': formats, - 'subtitles': subtitles, - } - - -class DiscoveryGoIE(DiscoveryGoBaseIE): - _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+' - _GEO_COUNTRIES = ['US'] - _TEST = { - 'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/', - 'info_dict': { - 'id': '58c167d86b66d12f2addeb01', - 'ext': 'mp4', - 'title': 'Reaper Madness', - 'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78', - 'duration': 2519, - 'series': 'Bering Sea Gold', - 'season_number': 8, - 'episode_number': 6, - 'age_limit': 14, - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - container = extract_attributes( - self._search_regex( - r'(]+class=["\']video-player-container[^>]+>)', - webpage, 'video container')) - - video = self._parse_json( - container.get('data-video') or container.get('data-json'), - display_id) - - stream = video.get('stream') - - return self._extract_video_info(video, stream, display_id) - - -class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE): - _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % '' - _TEST = { - 'url': 'https://www.discoverygo.com/bering-sea-gold/', - 'info_dict': { - 'id': 'bering-sea-gold', - 'title': 'Bering Sea Gold', - 'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e', - }, - 'playlist_mincount': 6, - } - - @classmethod - def suitable(cls, url): - return False if DiscoveryGoIE.suitable(url) else super().suitable(url) - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - entries = [] - for mobj in re.finditer(r'data-json=(["\'])(?P{.+?})\1', webpage): - data = self._parse_json( - mobj.group('json'), display_id, - transform_source=unescapeHTML, fatal=False) - if not isinstance(data, dict) or data.get('type') != 'episode': - continue - episode_url = data.get('socialUrl') - if not episode_url: - continue - entries.append(self.url_result( - episode_url, ie=DiscoveryGoIE.ie_key(), - video_id=data.get('id'))) - - return self.playlist_result( - entries, display_id, - remove_end(self._og_search_title( - webpage, fatal=False), ' | Discovery GO'), - self._og_search_description(webpage)) diff --git a/plugin/yt-dlp/yt_dlp/extractor/dplay.py b/plugin/yt-dlp/yt_dlp/extractor/dplay.py index 48eae10..8d77072 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/dplay.py +++ b/plugin/yt-dlp/yt_dlp/extractor/dplay.py @@ -346,8 +346,16 @@ def _real_extract(self, url): class DiscoveryPlusBaseIE(DPlayBaseIE): + """Subclasses must set _PRODUCT, _DISCO_API_PARAMS""" + + _DISCO_CLIENT_VER = '27.43.0' + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): - headers['x-disco-client'] = f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6' + headers.update({ + 'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}', + 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:{self._DISCO_CLIENT_VER}', + 'Authorization': self._get_auth(disco_base, display_id, realm), + }) def _download_video_playback_info(self, disco_base, video_id, headers): return self._download_json( @@ -368,6 +376,26 @@ def _real_extract(self, url): class GoDiscoveryIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://go.discovery.com/video/in-the-eye-of-the-storm-discovery-atve-us/trapped-in-a-twister', + 'info_dict': { + 'id': '5352642', + 'display_id': 'in-the-eye-of-the-storm-discovery-atve-us/trapped-in-a-twister', + 'ext': 'mp4', + 'title': 'Trapped in a Twister', + 'description': 'Twisters destroy Midwest towns, trapping spotters in the eye of the storm.', + 'episode_number': 1, + 'episode': 'Episode 1', + 'season_number': 1, + 'season': 'Season 1', + 'series': 'In The Eye Of The Storm', + 'duration': 2490.237, + 'upload_date': '20240715', + 'timestamp': 1721008800, + 'tags': [], + 'creators': ['Discovery'], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2024/07/10/5e39637d-cabf-3ab3-8e9a-f4e9d37bc036.jpeg', + }, + }, { 'url': 'https://go.discovery.com/video/dirty-jobs-discovery-atve-us/rodbuster-galvanizer', 'info_dict': { 'id': '4164906', @@ -395,6 +423,26 @@ class GoDiscoveryIE(DiscoveryPlusBaseIE): class TravelChannelIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:watch\.)?travelchannel\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://watch.travelchannel.com/video/the-dead-files-travel-channel/protect-the-children', + 'info_dict': { + 'id': '4710177', + 'display_id': 'the-dead-files-travel-channel/protect-the-children', + 'ext': 'mp4', + 'title': 'Protect the Children', + 'description': 'An evil presence threatens an Ohio woman\'s children and marriage.', + 'season_number': 14, + 'season': 'Season 14', + 'episode_number': 10, + 'episode': 'Episode 10', + 'series': 'The Dead Files', + 'duration': 2550.481, + 'timestamp': 1664510400, + 'upload_date': '20220930', + 'tags': [], + 'creators': ['Travel Channel'], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2022/03/17/5e45eace-de5d-343a-9293-f400a2aa77d5.jpeg', + }, + }, { 'url': 'https://watch.travelchannel.com/video/ghost-adventures-travel-channel/ghost-train-of-ely', 'info_dict': { 'id': '2220256', @@ -422,6 +470,26 @@ class TravelChannelIE(DiscoveryPlusBaseIE): class CookingChannelIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:watch\.)?cookingchanneltv\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://watch.cookingchanneltv.com/video/bobbys-triple-threat-food-network-atve-us/titans-vs-marcus-samuelsson', + 'info_dict': { + 'id': '5350005', + 'ext': 'mp4', + 'display_id': 'bobbys-triple-threat-food-network-atve-us/titans-vs-marcus-samuelsson', + 'title': 'Titans vs Marcus Samuelsson', + 'description': 'Marcus Samuelsson throws his legendary global tricks at the Titans.', + 'episode_number': 1, + 'episode': 'Episode 1', + 'season_number': 3, + 'season': 'Season 3', + 'series': 'Bobby\'s Triple Threat', + 'duration': 2520.851, + 'upload_date': '20240710', + 'timestamp': 1720573200, + 'tags': [], + 'creators': ['Food Network'], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2024/07/04/529cd095-27ec-35c5-84e9-90ebd3e5d2da.jpeg', + }, + }, { 'url': 'https://watch.cookingchanneltv.com/video/carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634', 'info_dict': { 'id': '2348634', @@ -449,6 +517,22 @@ class CookingChannelIE(DiscoveryPlusBaseIE): class HGTVUsaIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:watch\.)?hgtv\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://watch.hgtv.com/video/flip-or-flop-the-final-flip-hgtv-atve-us/flip-or-flop-the-final-flip', + 'info_dict': { + 'id': '5025585', + 'display_id': 'flip-or-flop-the-final-flip-hgtv-atve-us/flip-or-flop-the-final-flip', + 'ext': 'mp4', + 'title': 'Flip or Flop: The Final Flip', + 'description': 'Tarek and Christina are going their separate ways after one last flip!', + 'series': 'Flip or Flop: The Final Flip', + 'duration': 2580.644, + 'upload_date': '20231101', + 'timestamp': 1698811200, + 'tags': [], + 'creators': ['HGTV'], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2022/11/27/455caa6c-1462-3f14-b63d-a026d7a5e6d3.jpeg', + }, + }, { 'url': 'https://watch.hgtv.com/video/home-inspector-joe-hgtv-atve-us/this-mold-house', 'info_dict': { 'id': '4289736', @@ -476,6 +560,26 @@ class HGTVUsaIE(DiscoveryPlusBaseIE): class FoodNetworkIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:watch\.)?foodnetwork\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://watch.foodnetwork.com/video/guys-grocery-games-food-network/wild-in-the-aisles', + 'info_dict': { + 'id': '2152549', + 'display_id': 'guys-grocery-games-food-network/wild-in-the-aisles', + 'ext': 'mp4', + 'title': 'Wild in the Aisles', + 'description': 'The chefs make spaghetti and meatballs with "Out of Stock" ingredients.', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 1, + 'episode': 'Episode 1', + 'series': 'Guy\'s Grocery Games', + 'tags': [], + 'creators': ['Food Network'], + 'duration': 2520.651, + 'upload_date': '20230623', + 'timestamp': 1687492800, + 'thumbnail': 'https://us1-prod-images.disco-api.com/2022/06/15/37fb5333-cad2-3dbb-af7c-c20ec77c89c6.jpeg', + }, + }, { 'url': 'https://watch.foodnetwork.com/video/kids-baking-championship-food-network/float-like-a-butterfly', 'info_dict': { 'id': '4116449', @@ -503,6 +607,26 @@ class FoodNetworkIE(DiscoveryPlusBaseIE): class DestinationAmericaIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:www\.)?destinationamerica\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://www.destinationamerica.com/video/bbq-pit-wars-destination-america/smoke-on-the-water', + 'info_dict': { + 'id': '2218409', + 'display_id': 'bbq-pit-wars-destination-america/smoke-on-the-water', + 'ext': 'mp4', + 'title': 'Smoke on the Water', + 'description': 'The pitmasters head to Georgia for the Smoke on the Water BBQ Festival.', + 'season_number': 2, + 'season': 'Season 2', + 'episode_number': 1, + 'episode': 'Episode 1', + 'series': 'BBQ Pit Wars', + 'tags': [], + 'creators': ['Destination America'], + 'duration': 2614.878, + 'upload_date': '20230623', + 'timestamp': 1687492800, + 'thumbnail': 'https://us1-prod-images.disco-api.com/2020/05/11/c0f8e85d-9a10-3e6f-8e43-f6faafa81ba2.jpeg', + }, + }, { 'url': 'https://www.destinationamerica.com/video/alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot', 'info_dict': { 'id': '4210904', @@ -530,6 +654,26 @@ class DestinationAmericaIE(DiscoveryPlusBaseIE): class InvestigationDiscoveryIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:www\.)?investigationdiscovery\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://www.investigationdiscovery.com/video/deadly-influence-the-social-media-murders-investigation-discovery-atve-us/rip-bianca', + 'info_dict': { + 'id': '5341132', + 'display_id': 'deadly-influence-the-social-media-murders-investigation-discovery-atve-us/rip-bianca', + 'ext': 'mp4', + 'title': 'RIP Bianca', + 'description': 'A teenage influencer discovers an online world of threat, harm and danger.', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 3, + 'episode': 'Episode 3', + 'series': 'Deadly Influence: The Social Media Murders', + 'creators': ['Investigation Discovery'], + 'tags': [], + 'duration': 2490.888, + 'upload_date': '20240618', + 'timestamp': 1718672400, + 'thumbnail': 'https://us1-prod-images.disco-api.com/2024/06/15/b567c774-9e44-3c6c-b0ba-db860a73e812.jpeg', + }, + }, { 'url': 'https://www.investigationdiscovery.com/video/unmasked-investigation-discovery/the-killer-clown', 'info_dict': { 'id': '2139409', @@ -557,6 +701,26 @@ class InvestigationDiscoveryIE(DiscoveryPlusBaseIE): class AmHistoryChannelIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:www\.)?ahctv\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://www.ahctv.com/video/blood-and-fury-americas-civil-war-ahc/battle-of-bull-run', + 'info_dict': { + 'id': '2139199', + 'display_id': 'blood-and-fury-americas-civil-war-ahc/battle-of-bull-run', + 'ext': 'mp4', + 'title': 'Battle of Bull Run', + 'description': 'Two untested armies clash in the first real battle of the Civil War.', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 1, + 'episode': 'Episode 1', + 'series': 'Blood and Fury: America\'s Civil War', + 'duration': 2612.509, + 'upload_date': '20220923', + 'timestamp': 1663905600, + 'creators': ['AHC'], + 'tags': [], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2020/05/11/4af61bd7-d705-3108-82c4-1a6e541e20fa.jpeg', + }, + }, { 'url': 'https://www.ahctv.com/video/modern-sniper-ahc/army', 'info_dict': { 'id': '2309730', @@ -584,6 +748,26 @@ class AmHistoryChannelIE(DiscoveryPlusBaseIE): class ScienceChannelIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://www.sciencechannel.com/video/spaces-deepest-secrets-science-atve-us/mystery-of-the-dead-planets', + 'info_dict': { + 'id': '2347335', + 'display_id': 'spaces-deepest-secrets-science-atve-us/mystery-of-the-dead-planets', + 'ext': 'mp4', + 'title': 'Mystery of the Dead Planets', + 'description': 'Astronomers unmask the truly destructive nature of the cosmos.', + 'season_number': 7, + 'season': 'Season 7', + 'episode_number': 1, + 'episode': 'Episode 1', + 'series': 'Space\'s Deepest Secrets', + 'duration': 2524.989, + 'upload_date': '20230128', + 'timestamp': 1674882000, + 'creators': ['Science'], + 'tags': [], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/30/3796829d-aead-3f9a-bd8d-e49048b3cdca.jpeg', + }, + }, { 'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine', 'info_dict': { 'id': '2842849', @@ -608,36 +792,29 @@ class ScienceChannelIE(DiscoveryPlusBaseIE): } -class DIYNetworkIE(DiscoveryPlusBaseIE): - _VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayBaseIE._PATH_REGEX +class DiscoveryLifeIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:www\.)?discoverylife\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ - 'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas', + 'url': 'https://www.discoverylife.com/video/er-files-discovery-life-atve-us/sweet-charity', 'info_dict': { - 'id': '2309730', - 'display_id': 'pool-kings-diy-network/bringing-beach-life-to-texas', + 'id': '2347614', + 'display_id': 'er-files-discovery-life-atve-us/sweet-charity', 'ext': 'mp4', - 'title': 'Bringing Beach Life to Texas', - 'description': 'The Pool Kings give a family a day at the beach in their own backyard.', - 'season_number': 10, - 'episode_number': 2, + 'title': 'Sweet Charity', + 'description': 'The staff at Charity Hospital treat a serious foot infection.', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 1, + 'episode': 'Episode 1', + 'series': 'ER Files', + 'duration': 2364.261, + 'upload_date': '20230721', + 'timestamp': 1689912000, + 'creators': ['Discovery Life'], + 'tags': [], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/16/4b6f0124-360b-3546-b6a4-5552db886b86.jpeg', }, - 'skip': 'Available for Premium users', }, { - 'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas', - 'only_matching': True, - }] - - _PRODUCT = 'diy' - _DISCO_API_PARAMS = { - 'disco_host': 'us1-prod-direct.watch.diynetwork.com', - 'realm': 'go', - 'country': 'us', - } - - -class DiscoveryLifeIE(DiscoveryPlusBaseIE): - _VALID_URL = r'https?://(?:www\.)?discoverylife\.com/video' + DPlayBaseIE._PATH_REGEX - _TESTS = [{ 'url': 'https://www.discoverylife.com/video/surviving-death-discovery-life-atve-us/bodily-trauma', 'info_dict': { 'id': '2218238', @@ -665,6 +842,26 @@ class DiscoveryLifeIE(DiscoveryPlusBaseIE): class AnimalPlanetIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://www.animalplanet.com/video/mysterious-creatures-with-forrest-galante-animal-planet-atve-us/the-demon-of-peru', + 'info_dict': { + 'id': '4650835', + 'display_id': 'mysterious-creatures-with-forrest-galante-animal-planet-atve-us/the-demon-of-peru', + 'ext': 'mp4', + 'title': 'The Demon of Peru', + 'description': 'In Peru, a farming village is being terrorized by a “man-like beast.”', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 4, + 'episode': 'Episode 4', + 'series': 'Mysterious Creatures with Forrest Galante', + 'duration': 2490.488, + 'upload_date': '20230111', + 'timestamp': 1673413200, + 'creators': ['Animal Planet'], + 'tags': [], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2022/03/01/6dbaa833-9a2e-3fee-9381-c19eddf67c0c.jpeg', + }, + }, { 'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown', 'info_dict': { 'id': '3338923', @@ -692,6 +889,26 @@ class AnimalPlanetIE(DiscoveryPlusBaseIE): class TLCIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:go\.)?tlc\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://go.tlc.com/video/90-day-the-last-resort-tlc-atve-us/the-last-chance', + 'info_dict': { + 'id': '5186422', + 'display_id': '90-day-the-last-resort-tlc-atve-us/the-last-chance', + 'ext': 'mp4', + 'title': 'The Last Chance', + 'description': 'Infidelity shakes Kalani and Asuelu\'s world, and Angela threatens divorce.', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 1, + 'episode': 'Episode 1', + 'series': '90 Day: The Last Resort', + 'duration': 5123.91, + 'upload_date': '20230815', + 'timestamp': 1692061200, + 'creators': ['TLC'], + 'tags': [], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2023/08/08/0ee367e2-ac76-334d-bf23-dbf796696a24.jpeg', + }, + }, { 'url': 'https://go.tlc.com/video/my-600-lb-life-tlc/melissas-story-part-1', 'info_dict': { 'id': '2206540', @@ -716,93 +933,8 @@ class TLCIE(DiscoveryPlusBaseIE): } -class MotorTrendIE(DiscoveryPlusBaseIE): - _VALID_URL = r'https?://(?:watch\.)?motortrend\.com/video' + DPlayBaseIE._PATH_REGEX - _TESTS = [{ - 'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas', - 'info_dict': { - 'id': '"4859182"', - 'display_id': 'double-dakotas', - 'ext': 'mp4', - 'title': 'Double Dakotas', - 'description': 'Tylers buy-one-get-one Dakota deal has the Wizard pulling double duty.', - 'season_number': 2, - 'episode_number': 3, - }, - 'skip': 'Available for Premium users', - }, { - 'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas', - 'only_matching': True, - }] - - _PRODUCT = 'vel' - _DISCO_API_PARAMS = { - 'disco_host': 'us1-prod-direct.watch.motortrend.com', - 'realm': 'go', - 'country': 'us', - } - - -class MotorTrendOnDemandIE(DiscoveryPlusBaseIE): - _VALID_URL = r'https?://(?:www\.)?motortrend(?:ondemand\.com|\.com/plus)/detail' + DPlayBaseIE._PATH_REGEX - _TESTS = [{ - 'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784', - 'info_dict': { - 'id': '37699', - 'display_id': 'wheelstanding-dump-truck-stubby-bobs-comeback/37699', - 'ext': 'mp4', - 'title': 'Wheelstanding Dump Truck! Stubby Bob’s Comeback', - 'description': 'md5:996915abe52a1c3dfc83aecea3cce8e7', - 'season_number': 5, - 'episode_number': 52, - 'episode': 'Episode 52', - 'season': 'Season 5', - 'thumbnail': r're:^https?://.+\.jpe?g$', - 'timestamp': 1388534401, - 'duration': 1887.345, - 'creator': 'Originals', - 'series': 'Roadkill', - 'upload_date': '20140101', - 'tags': [], - }, - }, { - 'url': 'https://www.motortrend.com/plus/detail/roadworthy-rescues-teaser-trailer/4922860/', - 'info_dict': { - 'id': '4922860', - 'ext': 'mp4', - 'title': 'Roadworthy Rescues | Teaser Trailer', - 'description': 'Derek Bieri helps Freiburger and Finnegan with their \'68 big-block Dart.', - 'display_id': 'roadworthy-rescues-teaser-trailer/4922860', - 'creator': 'Originals', - 'series': 'Roadworthy Rescues', - 'thumbnail': r're:^https?://.+\.jpe?g$', - 'upload_date': '20220907', - 'timestamp': 1662523200, - 'duration': 1066.356, - 'tags': [], - }, - }, { - 'url': 'https://www.motortrend.com/plus/detail/ugly-duckling/2450033/12439', - 'only_matching': True, - }] - - _PRODUCT = 'MTOD' - _DISCO_API_PARAMS = { - 'disco_host': 'us1-prod-direct.motortrendondemand.com', - 'realm': 'motortrend', - 'country': 'us', - } - - def _update_disco_api_headers(self, headers, disco_base, display_id, realm): - headers.update({ - 'x-disco-params': f'realm={realm}', - 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:4.39.1-gi1', - 'Authorization': self._get_auth(disco_base, display_id, realm), - }) - - class DiscoveryPlusIE(DiscoveryPlusBaseIE): - _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:(?P[a-z]{2})/)?video(?:/sport|/olympics)?' + DPlayBaseIE._PATH_REGEX _TESTS = [{ 'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family', 'info_dict': { @@ -823,14 +955,45 @@ class DiscoveryPlusIE(DiscoveryPlusBaseIE): }, { 'url': 'https://discoveryplus.com/ca/video/bering-sea-gold-discovery-ca/goldslingers', 'only_matching': True, + }, { + 'url': 'https://www.discoveryplus.com/gb/video/sport/eurosport-1-british-eurosport-1-british-sport/6-hours-of-spa-review', + 'only_matching': True, + }, { + 'url': 'https://www.discoveryplus.com/gb/video/olympics/dplus-sport-dplus-sport-sport/rugby-sevens-australia-samoa', + 'only_matching': True, }] - _PRODUCT = 'dplus_us' - _DISCO_API_PARAMS = { - 'disco_host': 'us1-prod-direct.discoveryplus.com', - 'realm': 'go', - 'country': 'us', - } + _PRODUCT = None + _DISCO_API_PARAMS = None + + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): + headers.update({ + 'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}', + 'x-disco-client': f'WEB:UNKNOWN:dplus_us:{self._DISCO_CLIENT_VER}', + 'Authorization': self._get_auth(disco_base, display_id, realm), + }) + + def _real_extract(self, url): + video_id, country = self._match_valid_url(url).group('id', 'country') + if not country: + country = 'us' + + self._PRODUCT = f'dplus_{country}' + + if country in ('br', 'ca', 'us'): + self._DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.discoveryplus.com', + 'realm': 'go', + 'country': country, + } + else: + self._DISCO_API_PARAMS = { + 'disco_host': 'eu1-prod-direct.discoveryplus.com', + 'realm': 'dplay', + 'country': country, + } + + return self._get_disco_api_info(url, video_id, **self._DISCO_API_PARAMS) class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE): @@ -984,16 +1147,22 @@ def _real_extract(self, url): class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE): - _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/it/video' + DPlayBaseIE._PATH_REGEX + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/it/video(?:/sport|/olympics)?' + DPlayBaseIE._PATH_REGEX _TESTS = [{ 'url': 'https://www.discoveryplus.com/it/video/i-signori-della-neve/stagione-2-episodio-1-i-preparativi', 'only_matching': True, }, { 'url': 'https://www.discoveryplus.com/it/video/super-benny/trailer', 'only_matching': True, + }, { + 'url': 'https://www.discoveryplus.com/it/video/olympics/dplus-sport-dplus-sport-sport/water-polo-greece-italy', + 'only_matching': True, + }, { + 'url': 'https://www.discoveryplus.com/it/video/sport/dplus-sport-dplus-sport-sport/lisa-vittozzi-allinferno-e-ritorno', + 'only_matching': True, }] - _PRODUCT = 'dplus_us' + _PRODUCT = 'dplus_it' _DISCO_API_PARAMS = { 'disco_host': 'eu1-prod-direct.discoveryplus.com', 'realm': 'dplay', @@ -1002,8 +1171,8 @@ class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE): def _update_disco_api_headers(self, headers, disco_base, display_id, realm): headers.update({ - 'x-disco-params': f'realm={realm}', - 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6', + 'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}', + 'x-disco-client': f'WEB:UNKNOWN:dplus_us:{self._DISCO_CLIENT_VER}', 'Authorization': self._get_auth(disco_base, display_id, realm), }) @@ -1044,39 +1213,3 @@ class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE): _SHOW_STR = 'show' _INDEX = 4 _VIDEO_IE = DiscoveryPlusIndiaIE - - -class GlobalCyclingNetworkPlusIE(DiscoveryPlusBaseIE): - _VALID_URL = r'https?://plus\.globalcyclingnetwork\.com/watch/(?P\d+)' - _TESTS = [{ - 'url': 'https://plus.globalcyclingnetwork.com/watch/1397691', - 'info_dict': { - 'id': '1397691', - 'ext': 'mp4', - 'title': 'The Athertons: Mountain Biking\'s Fastest Family', - 'description': 'md5:75a81937fcd8b989eec6083a709cd837', - 'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/04/eb9e3026-4849-3001-8281-9356466f0557.png', - 'series': 'gcn', - 'creator': 'Gcn', - 'upload_date': '20210309', - 'timestamp': 1615248000, - 'duration': 2531.0, - 'tags': [], - }, - 'skip': 'Subscription required', - 'params': {'skip_download': 'm3u8'}, - }] - - _PRODUCT = 'web' - _DISCO_API_PARAMS = { - 'disco_host': 'disco-api-prod.globalcyclingnetwork.com', - 'realm': 'gcn', - 'country': 'us', - } - - def _update_disco_api_headers(self, headers, disco_base, display_id, realm): - headers.update({ - 'x-disco-params': f'realm={realm}', - 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:27.3.2', - 'Authorization': self._get_auth(disco_base, display_id, realm), - }) diff --git a/plugin/yt-dlp/yt_dlp/extractor/epidemicsound.py b/plugin/yt-dlp/yt_dlp/extractor/epidemicsound.py index 0d81b11..75b0f05 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/epidemicsound.py +++ b/plugin/yt-dlp/yt_dlp/extractor/epidemicsound.py @@ -2,6 +2,7 @@ from ..utils import ( float_or_none, int_or_none, + join_nonempty, orderedSet, parse_iso8601, parse_qs, @@ -13,7 +14,7 @@ class EpidemicSoundIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/track/(?P[0-9a-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/(?:(?Psound-effects/tracks)|track)/(?P[0-9a-zA-Z-]+)' _TESTS = [{ 'url': 'https://www.epidemicsound.com/track/yFfQVRpSPz/', 'md5': 'd98ff2ddb49e8acab9716541cbc9dfac', @@ -47,6 +48,20 @@ class EpidemicSoundIE(InfoExtractor): 'release_timestamp': 1700535606, 'release_date': '20231121', }, + }, { + 'url': 'https://www.epidemicsound.com/sound-effects/tracks/2f02f54b-9faa-4daf-abac-1cfe9e9cef69/', + 'md5': '35d7cf05bd8b614a84f0495a05de9388', + 'info_dict': { + 'id': '208931', + 'ext': 'mp3', + 'upload_date': '20240603', + 'timestamp': 1717436529, + 'categories': ['appliance'], + 'display_id': '6b2NXLURPr', + 'duration': 1.0, + 'title': 'Oven, Grill, Door Open 01', + 'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/default-sfx/3000x3000.jpg', + }, }] @staticmethod @@ -77,8 +92,10 @@ def _epidemic_fmt_or_none(f): return f def _real_extract(self, url): - video_id = self._match_id(url) - json_data = self._download_json(f'https://www.epidemicsound.com/json/track/{video_id}', video_id) + video_id, is_sfx = self._match_valid_url(url).group('id', 'sfx') + json_data = self._download_json(join_nonempty( + 'https://www.epidemicsound.com/json/track', + is_sfx and 'kosmos-id', video_id, delim='/'), video_id) thumbnails = traverse_obj(json_data, [('imageUrl', 'cover')]) thumb_base_url = traverse_obj(json_data, ('coverArt', 'baseUrl', {url_or_none})) diff --git a/plugin/yt-dlp/yt_dlp/extractor/facebook.py b/plugin/yt-dlp/yt_dlp/extractor/facebook.py index a3ca291..6aba477 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/facebook.py +++ b/plugin/yt-dlp/yt_dlp/extractor/facebook.py @@ -571,16 +571,21 @@ def process_formats(info): # Formats larger than ~500MB will return error 403 unless chunk size is regulated f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20 - def extract_relay_data(_filter): - return self._parse_json(self._search_regex( - rf'data-sjs>({{.*?{_filter}.*?}})', - webpage, 'replay data', default='{}'), video_id, fatal=False) or {} + def yield_all_relay_data(_filter): + for relay_data in re.findall(rf'data-sjs>({{.*?{_filter}.*?}})', webpage): + yield self._parse_json(relay_data, video_id, fatal=False) or {} - def extract_relay_prefetched_data(_filter): - return traverse_obj(extract_relay_data(_filter), ( - 'require', (None, (..., ..., ..., '__bbox', 'require')), + def extract_relay_data(_filter): + return next(filter(None, yield_all_relay_data(_filter)), {}) + + def extract_relay_prefetched_data(_filter, target_keys=None): + path = 'data' + if target_keys is not None: + path = lambda k, v: k == 'data' and any(target in v for target in variadic(target_keys)) + return traverse_obj(yield_all_relay_data(_filter), ( + ..., 'require', (None, (..., ..., ..., '__bbox', 'require')), lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v), - ..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {} + ..., ..., '__bbox', 'result', path, {dict}), get_all=False) or {} if not video_data: server_js_data = self._parse_json(self._search_regex([ @@ -591,7 +596,8 @@ def extract_relay_prefetched_data(_filter): if not video_data: data = extract_relay_prefetched_data( - r'"(?:dash_manifest|playable_url(?:_quality_hd)?)') + r'"(?:dash_manifest|playable_url(?:_quality_hd)?)', + target_keys=('video', 'event', 'nodes', 'node', 'mediaset')) if data: entries = [] diff --git a/plugin/yt-dlp/yt_dlp/extractor/generic.py b/plugin/yt-dlp/yt_dlp/extractor/generic.py index 3b8e1e9..04cffaa 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/generic.py +++ b/plugin/yt-dlp/yt_dlp/extractor/generic.py @@ -43,6 +43,7 @@ xpath_text, xpath_with_ns, ) +from ..utils._utils import _UnsafeExtensionError class GenericIE(InfoExtractor): @@ -2446,9 +2447,13 @@ def _real_extract(self, url): if not is_html(first_bytes): self.report_warning( 'URL could be a direct video link, returning it as such.') + ext = determine_ext(url) + if ext not in _UnsafeExtensionError.ALLOWED_EXTENSIONS: + ext = 'unknown_video' info_dict.update({ 'direct': True, 'url': url, + 'ext': ext, }) return info_dict diff --git a/plugin/yt-dlp/yt_dlp/extractor/kick.py b/plugin/yt-dlp/yt_dlp/extractor/kick.py index 889548f..1c1b2a1 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/kick.py +++ b/plugin/yt-dlp/yt_dlp/extractor/kick.py @@ -1,9 +1,14 @@ +import functools + from .common import InfoExtractor from ..networking import HEADRequest from ..utils import ( UserNotLive, + determine_ext, float_or_none, + int_or_none, merge_dicts, + parse_iso8601, str_or_none, traverse_obj, unified_timestamp, @@ -25,104 +30,192 @@ def _real_initialize(self): def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs): return self._download_json( - f'https://kick.com/api/v1/{path}', display_id, note=note, + f'https://kick.com/api/{path}', display_id, note=note, headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs) class KickIE(KickBaseIE): + IE_NAME = 'kick:live' _VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P[\w-]+)' _TESTS = [{ - 'url': 'https://kick.com/yuppy', + 'url': 'https://kick.com/buddha', 'info_dict': { - 'id': '6cde1-kickrp-joe-flemmingskick-info-heremust-knowmust-see21', + 'id': '92722911-nopixel-40', 'ext': 'mp4', 'title': str, 'description': str, - 'channel': 'yuppy', - 'channel_id': '33538', - 'uploader': 'Yuppy', - 'uploader_id': '33793', - 'upload_date': str, - 'live_status': 'is_live', 'timestamp': int, - 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnail': r're:https?://.+\.jpg', 'categories': list, + 'upload_date': str, + 'channel': 'buddha', + 'channel_id': '32807', + 'uploader': 'Buddha', + 'uploader_id': '33057', + 'live_status': 'is_live', + 'concurrent_view_count': int, + 'release_timestamp': int, + 'age_limit': 18, + 'release_date': str, }, - 'skip': 'livestream', + 'params': {'skip_download': 'livestream'}, + # 'skip': 'livestream', }, { - 'url': 'https://kick.com/kmack710', + 'url': 'https://kick.com/xqc', 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return False if KickClipIE.suitable(url) else super().suitable(url) + def _real_extract(self, url): channel = self._match_id(url) - response = self._call_api(f'channels/{channel}', channel) + response = self._call_api(f'v2/channels/{channel}', channel) if not traverse_obj(response, 'livestream', expected_type=dict): raise UserNotLive(video_id=channel) return { - 'id': str(traverse_obj( - response, ('livestream', ('slug', 'id')), get_all=False, default=channel)), - 'formats': self._extract_m3u8_formats( - response['playback_url'], channel, 'mp4', live=True), - 'title': traverse_obj( - response, ('livestream', ('session_title', 'slug')), get_all=False, default=''), - 'description': traverse_obj(response, ('user', 'bio')), 'channel': channel, - 'channel_id': str_or_none(traverse_obj(response, 'id', ('livestream', 'channel_id'))), - 'uploader': traverse_obj(response, 'name', ('user', 'username')), - 'uploader_id': str_or_none(traverse_obj(response, 'user_id', ('user', 'id'))), 'is_live': True, - 'timestamp': unified_timestamp(traverse_obj(response, ('livestream', 'created_at'))), - 'thumbnail': traverse_obj( - response, ('livestream', 'thumbnail', 'url'), expected_type=url_or_none), - 'categories': traverse_obj(response, ('recent_categories', ..., 'name')), + 'formats': self._extract_m3u8_formats(response['playback_url'], channel, 'mp4', live=True), + **traverse_obj(response, { + 'id': ('livestream', 'slug', {str}), + 'title': ('livestream', 'session_title', {str}), + 'description': ('user', 'bio', {str}), + 'channel_id': (('id', ('livestream', 'channel_id')), {int}, {str_or_none}, any), + 'uploader': (('name', ('user', 'username')), {str}, any), + 'uploader_id': (('user_id', ('user', 'id')), {int}, {str_or_none}, any), + 'timestamp': ('livestream', 'created_at', {unified_timestamp}), + 'release_timestamp': ('livestream', 'start_time', {unified_timestamp}), + 'thumbnail': ('livestream', 'thumbnail', 'url', {url_or_none}), + 'categories': ('recent_categories', ..., 'name', {str}), + 'concurrent_view_count': ('livestream', 'viewer_count', {int_or_none}), + 'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}), + }), } class KickVODIE(KickBaseIE): + IE_NAME = 'kick:vod' _VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' _TESTS = [{ - 'url': 'https://kick.com/video/58bac65b-e641-4476-a7ba-3707a35e60e3', + 'url': 'https://kick.com/video/e74614f4-5270-4319-90ad-32179f19a45c', 'md5': '3870f94153e40e7121a6e46c068b70cb', 'info_dict': { - 'id': '58bac65b-e641-4476-a7ba-3707a35e60e3', + 'id': 'e74614f4-5270-4319-90ad-32179f19a45c', 'ext': 'mp4', - 'title': '🤠REBIRTH IS BACK!!!!🤠!stake CODE JAREDFPS 🤠', - 'description': 'md5:02b0c46f9b4197fb545ab09dddb85b1d', - 'channel': 'jaredfps', - 'channel_id': '26608', - 'uploader': 'JaredFPS', - 'uploader_id': '26799', - 'upload_date': '20240402', - 'timestamp': 1712097108, - 'duration': 33859.0, + 'title': r're:❎ MEGA DRAMA ❎ LIVE ❎ CLICK ❎ ULTIMATE SKILLS .+', + 'description': 'THE BEST AT ABSOLUTELY EVERYTHING. THE JUICER. LEADER OF THE JUICERS.', + 'channel': 'xqc', + 'channel_id': '668', + 'uploader': 'xQc', + 'uploader_id': '676', + 'upload_date': '20240724', + 'timestamp': 1721796562, + 'duration': 18566.0, 'thumbnail': r're:^https?://.*\.jpg', - 'categories': ['Call of Duty: Warzone'], + 'view_count': int, + 'categories': ['VALORANT'], + 'age_limit': 0, }, - 'params': { - 'skip_download': 'm3u8', - }, - 'expected_warnings': [r'impersonation'], + 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): video_id = self._match_id(url) - response = self._call_api(f'video/{video_id}', video_id) + response = self._call_api(f'v1/video/{video_id}', video_id) return { 'id': video_id, 'formats': self._extract_m3u8_formats(response['source'], video_id, 'mp4'), - 'title': traverse_obj( - response, ('livestream', ('session_title', 'slug')), get_all=False, default=''), - 'description': traverse_obj(response, ('livestream', 'channel', 'user', 'bio')), - 'channel': traverse_obj(response, ('livestream', 'channel', 'slug')), - 'channel_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'id'))), - 'uploader': traverse_obj(response, ('livestream', 'channel', 'user', 'username')), - 'uploader_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'user_id'))), - 'timestamp': unified_timestamp(response.get('created_at')), - 'duration': float_or_none(traverse_obj(response, ('livestream', 'duration')), scale=1000), - 'thumbnail': traverse_obj( - response, ('livestream', 'thumbnail'), expected_type=url_or_none), - 'categories': traverse_obj(response, ('livestream', 'categories', ..., 'name')), + **traverse_obj(response, { + 'title': ('livestream', ('session_title', 'slug'), {str}, any), + 'description': ('livestream', 'channel', 'user', 'bio', {str}), + 'channel': ('livestream', 'channel', 'slug', {str}), + 'channel_id': ('livestream', 'channel', 'id', {int}, {str_or_none}), + 'uploader': ('livestream', 'channel', 'user', 'username', {str}), + 'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}), + 'timestamp': ('created_at', {parse_iso8601}), + 'duration': ('livestream', 'duration', {functools.partial(float_or_none, scale=1000)}), + 'thumbnail': ('livestream', 'thumbnail', {url_or_none}), + 'categories': ('livestream', 'categories', ..., 'name', {str}), + 'view_count': ('views', {int_or_none}), + 'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}), + }), + } + + +class KickClipIE(KickBaseIE): + IE_NAME = 'kick:clips' + _VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/?\?(?:[^#]+&)?clip=(?Pclip_[\w-]+)' + _TESTS = [{ + 'url': 'https://kick.com/mxddy?clip=clip_01GYXVB5Y8PWAPWCWMSBCFB05X', + 'info_dict': { + 'id': 'clip_01GYXVB5Y8PWAPWCWMSBCFB05X', + 'ext': 'mp4', + 'title': 'Maddy detains Abd D:', + 'channel': 'mxddy', + 'channel_id': '133789', + 'uploader': 'AbdCreates', + 'uploader_id': '3309077', + 'thumbnail': r're:^https?://.*\.jpeg', + 'duration': 35, + 'timestamp': 1682481453, + 'upload_date': '20230426', + 'view_count': int, + 'like_count': int, + 'categories': ['VALORANT'], + 'age_limit': 18, + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://kick.com/destiny?clip=clip_01H9SKET879NE7N9RJRRDS98J3', + 'info_dict': { + 'id': 'clip_01H9SKET879NE7N9RJRRDS98J3', + 'title': 'W jews', + 'ext': 'mp4', + 'channel': 'destiny', + 'channel_id': '1772249', + 'uploader': 'punished_furry', + 'uploader_id': '2027722', + 'duration': 49.0, + 'upload_date': '20230908', + 'timestamp': 1694150180, + 'thumbnail': 'https://clips.kick.com/clips/j3/clip_01H9SKET879NE7N9RJRRDS98J3/thumbnail.png', + 'view_count': int, + 'like_count': int, + 'categories': ['Just Chatting'], + 'age_limit': 0, + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + clip_id = self._match_id(url) + clip = self._call_api(f'v2/clips/{clip_id}/play', clip_id)['clip'] + clip_url = clip['clip_url'] + + if determine_ext(clip_url) == 'm3u8': + formats = self._extract_m3u8_formats(clip_url, clip_id, 'mp4') + else: + formats = [{'url': clip_url}] + + return { + 'id': clip_id, + 'formats': formats, + **traverse_obj(clip, { + 'title': ('title', {str}), + 'channel': ('channel', 'slug', {str}), + 'channel_id': ('channel', 'id', {int}, {str_or_none}), + 'uploader': ('creator', 'username', {str}), + 'uploader_id': ('creator', 'id', {int}, {str_or_none}), + 'thumbnail': ('thumbnail_url', {url_or_none}), + 'duration': ('duration', {float_or_none}), + 'categories': ('category', 'name', {str}, all), + 'timestamp': ('created_at', {parse_iso8601}), + 'view_count': ('views', {int_or_none}), + 'like_count': ('likes', {int_or_none}), + 'age_limit': ('is_mature', {bool}, {lambda x: 18 if x else 0}), + }), } diff --git a/plugin/yt-dlp/yt_dlp/extractor/learningonscreen.py b/plugin/yt-dlp/yt_dlp/extractor/learningonscreen.py new file mode 100644 index 0000000..dcf8314 --- /dev/null +++ b/plugin/yt-dlp/yt_dlp/extractor/learningonscreen.py @@ -0,0 +1,78 @@ +import functools +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + extract_attributes, + get_element_by_class, + get_element_html_by_id, + join_nonempty, + parse_duration, + unified_timestamp, +) +from ..utils.traversal import traverse_obj + + +class LearningOnScreenIE(InfoExtractor): + _VALID_URL = r'https?://learningonscreen\.ac\.uk/ondemand/index\.php/prog/(?P\w+)' + _TESTS = [{ + 'url': 'https://learningonscreen.ac.uk/ondemand/index.php/prog/005D81B2?bcast=22757013', + 'info_dict': { + 'id': '005D81B2', + 'ext': 'mp4', + 'title': 'Planet Earth', + 'duration': 3600.0, + 'timestamp': 1164567600.0, + 'upload_date': '20061126', + 'thumbnail': 'https://stream.learningonscreen.ac.uk/trilt-cover-images/005D81B2-Planet-Earth-2006-11-26T190000Z-BBC4.jpg', + }, + }] + + def _real_initialize(self): + if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'): + self.raise_login_required( + 'Use --cookies for authentication. See ' + ' https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp ' + 'for how to manually pass cookies', method=None) + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + details = traverse_obj(webpage, ( + {functools.partial(get_element_html_by_id, 'programme-details')}, { + 'title': ({functools.partial(re.search, r'

([^<]+)

')}, 1, {clean_html}), + 'timestamp': ( + {functools.partial(get_element_by_class, 'broadcast-date')}, + {functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}), + 'duration': ( + {functools.partial(get_element_by_class, 'prog-running-time')}, + {clean_html}, {parse_duration}), + })) + + title = details.pop('title', None) or traverse_obj(webpage, ( + {functools.partial(get_element_html_by_id, 'add-to-existing-playlist')}, + {extract_attributes}, 'data-record-title', {clean_html})) + + entries = self._parse_html5_media_entries( + 'https://stream.learningonscreen.ac.uk', webpage, video_id, m3u8_id='hls', mpd_id='dash', + _headers={'Origin': 'https://learningonscreen.ac.uk', 'Referer': 'https://learningonscreen.ac.uk/'}) + if not entries: + raise ExtractorError('No video found') + + if len(entries) > 1: + duration = details.pop('duration', None) + for idx, entry in enumerate(entries, start=1): + entry.update(details) + entry['id'] = join_nonempty(video_id, idx) + entry['title'] = join_nonempty(title, idx) + return self.playlist_result(entries, video_id, title, duration=duration) + + return { + **entries[0], + **details, + 'id': video_id, + 'title': title, + } diff --git a/plugin/yt-dlp/yt_dlp/extractor/mediaklikk.py b/plugin/yt-dlp/yt_dlp/extractor/mediaklikk.py index bd1a27f..f513420 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/mediaklikk.py +++ b/plugin/yt-dlp/yt_dlp/extractor/mediaklikk.py @@ -133,7 +133,9 @@ def _real_extract(self, url): r']+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None)) player_data['video'] = player_data.pop('token') - player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data) + player_page = self._download_webpage( + 'https://player.mediaklikk.hu/playernew/player.php', video_id, + query=player_data, headers={'Referer': url}) player_json = self._search_json( r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);') playlist_url = traverse_obj( diff --git a/plugin/yt-dlp/yt_dlp/extractor/mlb.py b/plugin/yt-dlp/yt_dlp/extractor/mlb.py index 6f67602..935bf85 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/mlb.py +++ b/plugin/yt-dlp/yt_dlp/extractor/mlb.py @@ -1,16 +1,21 @@ +import json import re -import urllib.parse +import time import uuid from .common import InfoExtractor +from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, determine_ext, int_or_none, join_nonempty, + jwt_decode_hs256, parse_duration, parse_iso8601, try_get, url_or_none, + urlencode_postdata, ) from ..utils.traversal import traverse_obj @@ -276,81 +281,225 @@ def _download_video_data(self, display_id): class MLBTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P\d{6})' _NETRC_MACHINE = 'mlb' - _TESTS = [{ 'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638', 'info_dict': { 'id': '661581', 'ext': 'mp4', 'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies', + 'release_date': '20220702', + 'release_timestamp': 1656792300, }, - 'params': { - 'skip_download': True, + 'params': {'skip_download': 'm3u8'}, + }, { + # makeup game: has multiple dates, need to avoid games with 'rescheduleDate' + 'url': 'https://www.mlb.com/tv/g747039/vd22541c4-5a29-45f7-822b-635ec041cf5e', + 'info_dict': { + 'id': '747039', + 'ext': 'mp4', + 'title': '2024-07-29 - Toronto Blue Jays @ Baltimore Orioles', + 'release_date': '20240729', + 'release_timestamp': 1722280200, }, + 'params': {'skip_download': 'm3u8'}, }] + _GRAPHQL_INIT_QUERY = '''\ +mutation initSession($device: InitSessionInput!, $clientType: ClientType!, $experience: ExperienceTypeInput) { + initSession(device: $device, clientType: $clientType, experience: $experience) { + deviceId + sessionId + entitlements { + code + } + location { + countryCode + regionName + zipCode + latitude + longitude + } + clientExperience + features + } + }''' + _GRAPHQL_PLAYBACK_QUERY = '''\ +mutation initPlaybackSession( + $adCapabilities: [AdExperienceType] + $mediaId: String! + $deviceId: String! + $sessionId: String! + $quality: PlaybackQuality + ) { + initPlaybackSession( + adCapabilities: $adCapabilities + mediaId: $mediaId + deviceId: $deviceId + sessionId: $sessionId + quality: $quality + ) { + playbackSessionId + playback { + url + token + expiration + cdn + } + } + }''' + _APP_VERSION = '7.8.2' + _device_id = None + _session_id = None _access_token = None + _token_expiry = 0 + + @property + def _api_headers(self): + if (self._token_expiry - 120) <= time.time(): + self.write_debug('Access token has expired; re-logging in') + self._perform_login(*self._get_login_info()) + return {'Authorization': f'Bearer {self._access_token}'} def _real_initialize(self): if not self._access_token: self.raise_login_required( 'All videos are only available to registered users', method='password') - def _perform_login(self, username, password): - data = f'grant_type=password&username={urllib.parse.quote(username)}&password={urllib.parse.quote(password)}&scope=openid offline_access&client_id=0oa3e1nutA1HLzAKG356' - access_token = self._download_json( - 'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None, - headers={ - 'User-Agent': 'okhttp/3.12.1', - 'Content-Type': 'application/x-www-form-urlencoded', - }, data=data.encode())['access_token'] + def _set_device_id(self, username): + if not self._device_id: + self._device_id = self.cache.load( + self._NETRC_MACHINE, 'device_ids', default={}).get(username) + if self._device_id: + return + self._device_id = str(uuid.uuid4()) + self.cache.store(self._NETRC_MACHINE, 'device_ids', {username: self._device_id}) - entitlement = self._download_webpage( - f'https://media-entitlement.mlb.com/api/v3/jwt?os=Android&appname=AtBat&did={uuid.uuid4()}', None, - headers={ - 'User-Agent': 'okhttp/3.12.1', - 'Authorization': f'Bearer {access_token}', - }) + def _perform_login(self, username, password): + try: + self._access_token = self._download_json( + 'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None, + 'Logging in', 'Unable to log in', headers={ + 'User-Agent': 'okhttp/3.12.1', + 'Content-Type': 'application/x-www-form-urlencoded', + }, data=urlencode_postdata({ + 'grant_type': 'password', + 'username': username, + 'password': password, + 'scope': 'openid offline_access', + 'client_id': '0oa3e1nutA1HLzAKG356', + }))['access_token'] + except ExtractorError as error: + if isinstance(error.cause, HTTPError) and error.cause.status == 400: + raise ExtractorError('Invalid username or password', expected=True) + raise + + self._token_expiry = traverse_obj(self._access_token, ({jwt_decode_hs256}, 'exp', {int})) or 0 + self._set_device_id(username) + + self._session_id = self._call_api({ + 'operationName': 'initSession', + 'query': self._GRAPHQL_INIT_QUERY, + 'variables': { + 'device': { + 'appVersion': self._APP_VERSION, + 'deviceFamily': 'desktop', + 'knownDeviceId': self._device_id, + 'languagePreference': 'ENGLISH', + 'manufacturer': '', + 'model': '', + 'os': '', + 'osVersion': '', + }, + 'clientType': 'WEB', + }, + }, None, 'session ID')['data']['initSession']['sessionId'] - data = f'grant_type=urn:ietf:params:oauth:grant-type:token-exchange&subject_token={entitlement}&subject_token_type=urn:ietf:params:oauth:token-type:jwt&platform=android-tv' - self._access_token = self._download_json( - 'https://us.edge.bamgrid.com/token', None, + def _call_api(self, data, video_id, description='GraphQL JSON', fatal=True): + return self._download_json( + 'https://media-gateway.mlb.com/graphql', video_id, + f'Downloading {description}', f'Unable to download {description}', fatal=fatal, headers={ + **self._api_headers, 'Accept': 'application/json', - 'Authorization': 'Bearer bWxidHYmYW5kcm9pZCYxLjAuMA.6LZMbH2r--rbXcgEabaDdIslpo4RyZrlVfWZhsAgXIk', - 'Content-Type': 'application/x-www-form-urlencoded', - }, data=data.encode())['access_token'] + 'Content-Type': 'application/json', + 'x-client-name': 'WEB', + 'x-client-version': self._APP_VERSION, + }, data=json.dumps(data, separators=(',', ':')).encode()) + + def _extract_formats_and_subtitles(self, broadcast, video_id): + feed = traverse_obj(broadcast, ('homeAway', {str.title})) + medium = traverse_obj(broadcast, ('type', {str})) + language = traverse_obj(broadcast, ('language', {str.lower})) + format_id = join_nonempty(feed, medium, language) + + response = self._call_api({ + 'operationName': 'initPlaybackSession', + 'query': self._GRAPHQL_PLAYBACK_QUERY, + 'variables': { + 'adCapabilities': ['GOOGLE_STANDALONE_AD_PODS'], + 'deviceId': self._device_id, + 'mediaId': broadcast['mediaId'], + 'quality': 'PLACEHOLDER', + 'sessionId': self._session_id, + }, + }, video_id, f'{format_id} broadcast JSON', fatal=False) + + playback = traverse_obj(response, ('data', 'initPlaybackSession', 'playback', {dict})) + m3u8_url = traverse_obj(playback, ('url', {url_or_none})) + token = traverse_obj(playback, ('token', {str})) + + if not (m3u8_url and token): + errors = '; '.join(traverse_obj(response, ('errors', ..., 'message', {str}))) + if 'not entitled' in errors: + raise ExtractorError(errors, expected=True) + elif errors: # Only warn when 'blacked out' since radio formats are available + self.report_warning(f'API returned errors for {format_id}: {errors}') + else: + self.report_warning(f'No formats available for {format_id} broadcast; skipping') + return [], {} + + cdn_headers = {'x-cdn-token': token} + fmts, subs = self._extract_m3u8_formats_and_subtitles( + m3u8_url.replace(f'/{token}/', '/'), video_id, 'mp4', + m3u8_id=format_id, fatal=False, headers=cdn_headers) + for fmt in fmts: + fmt['http_headers'] = cdn_headers + fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' ')) + fmt.setdefault('language', language) + if fmt.get('vcodec') == 'none' and fmt['language'] == 'en': + fmt['source_preference'] = 10 + + return fmts, subs def _real_extract(self, url): video_id = self._match_id(url) - airings = self._download_json( - f'https://search-api-mlbtv.mlb.com/svc/search/v2/graphql/persisted/query/core/Airings?variables=%7B%22partnerProgramIds%22%3A%5B%22{video_id}%22%5D%2C%22applyEsniMediaRightsLabels%22%3Atrue%7D', - video_id)['data']['Airings'] + data = self._download_json( + 'https://statsapi.mlb.com/api/v1/schedule', video_id, query={ + 'gamePk': video_id, + 'hydrate': 'broadcasts(all),statusFlags', + }) + metadata = traverse_obj(data, ( + 'dates', ..., 'games', + lambda _, v: str(v['gamePk']) == video_id and not v.get('rescheduleDate'), any)) + + broadcasts = traverse_obj(metadata, ( + 'broadcasts', lambda _, v: v['mediaId'] and v['mediaState']['mediaStateCode'] != 'MEDIA_OFF')) formats, subtitles = [], {} - for airing in traverse_obj(airings, lambda _, v: v['playbackUrls'][0]['href']): - format_id = join_nonempty('feedType', 'feedLanguage', from_dict=airing) - m3u8_url = traverse_obj(self._download_json( - airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id, - note=f'Downloading {format_id} stream info JSON', - errnote=f'Failed to download {format_id} stream info, skipping', - fatal=False, headers={ - 'Authorization': self._access_token, - 'Accept': 'application/vnd.media-service+json; version=2', - }), ('stream', 'complete', {url_or_none})) - if not m3u8_url: - continue - f, s = self._extract_m3u8_formats_and_subtitles( - m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) - formats.extend(f) - self._merge_subtitles(s, target=subtitles) + for broadcast in broadcasts: + fmts, subs = self._extract_formats_and_subtitles(broadcast, video_id) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) return { 'id': video_id, - 'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False), - 'is_live': traverse_obj(airings, (..., 'mediaConfig', 'productType'), get_all=False) == 'LIVE', + 'title': join_nonempty( + traverse_obj(metadata, ('officialDate', {str})), + traverse_obj(metadata, ('teams', ('away', 'home'), 'team', 'name', {str}, all, {' @ '.join})), + delim=' - '), + 'is_live': traverse_obj(broadcasts, (..., 'mediaState', 'mediaStateCode', {str}, any)) == 'MEDIA_ON', + 'release_timestamp': traverse_obj(metadata, ('gameDate', {parse_iso8601})), 'formats': formats, 'subtitles': subtitles, - 'http_headers': {'Authorization': f'Bearer {self._access_token}'}, } diff --git a/plugin/yt-dlp/yt_dlp/extractor/niconico.py b/plugin/yt-dlp/yt_dlp/extractor/niconico.py index 9d7b010..179e7a9 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/niconico.py +++ b/plugin/yt-dlp/yt_dlp/extractor/niconico.py @@ -40,7 +40,6 @@ class NiconicoIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.nicovideo.jp/watch/sm22312215', - 'md5': 'd1a75c0823e2f629128c43e1212760f9', 'info_dict': { 'id': 'sm22312215', 'ext': 'mp4', @@ -56,8 +55,8 @@ class NiconicoIE(InfoExtractor): 'comment_count': int, 'genres': ['未設定'], 'tags': [], - 'expected_protocol': str, }, + 'params': {'skip_download': 'm3u8'}, }, { # File downloaded with and without credentials are different, so omit # the md5 field @@ -77,8 +76,8 @@ class NiconicoIE(InfoExtractor): 'view_count': int, 'genres': ['音楽・サウンド'], 'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'], - 'expected_protocol': str, }, + 'params': {'skip_download': 'm3u8'}, }, { # 'video exists but is marked as "deleted" # md5 is unstable @@ -112,7 +111,6 @@ class NiconicoIE(InfoExtractor): }, { # video not available via `getflv`; "old" HTML5 video 'url': 'http://www.nicovideo.jp/watch/sm1151009', - 'md5': 'f95a3d259172667b293530cc2e41ebda', 'info_dict': { 'id': 'sm1151009', 'ext': 'mp4', @@ -128,11 +126,10 @@ class NiconicoIE(InfoExtractor): 'comment_count': int, 'genres': ['ゲーム'], 'tags': [], - 'expected_protocol': str, }, + 'params': {'skip_download': 'm3u8'}, }, { # "New" HTML5 video - # md5 is unstable 'url': 'http://www.nicovideo.jp/watch/sm31464864', 'info_dict': { 'id': 'sm31464864', @@ -149,12 +146,11 @@ class NiconicoIE(InfoExtractor): 'comment_count': int, 'genres': ['アニメ'], 'tags': [], - 'expected_protocol': str, }, + 'params': {'skip_download': 'm3u8'}, }, { # Video without owner 'url': 'http://www.nicovideo.jp/watch/sm18238488', - 'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e', 'info_dict': { 'id': 'sm18238488', 'ext': 'mp4', @@ -168,8 +164,8 @@ class NiconicoIE(InfoExtractor): 'comment_count': int, 'genres': ['エンターテイメント'], 'tags': [], - 'expected_protocol': str, }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg', 'only_matching': True, @@ -458,9 +454,11 @@ def _real_extract(self, url): if video_id.startswith('so'): video_id = self._match_id(handle.url) - api_data = self._parse_json(self._html_search_regex( - 'data-api-data="([^"]+)"', webpage, - 'API data', default='{}'), video_id) + api_data = traverse_obj( + self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id), + ('data', 'response', {dict})) + if not api_data: + raise ExtractorError('Server response data not found') except ExtractorError as e: try: api_data = self._download_json( diff --git a/plugin/yt-dlp/yt_dlp/extractor/olympics.py b/plugin/yt-dlp/yt_dlp/extractor/olympics.py index becf052..bbf83e5 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/olympics.py +++ b/plugin/yt-dlp/yt_dlp/extractor/olympics.py @@ -1,9 +1,19 @@ from .common import InfoExtractor -from ..utils import int_or_none, try_get +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + int_or_none, + parse_iso8601, + parse_qs, + try_get, + update_url, + url_or_none, +) +from ..utils.traversal import traverse_obj class OlympicsReplayIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?olympics\.com(?:/tokyo-2020)?/[a-z]{2}/(?:replay|video)/(?P[^/#&?]+)' + _VALID_URL = r'https?://(?:www\.)?olympics\.com/[a-z]{2}/(?:paris-2024/)?(?:replay|videos?|original-series/episode)/(?P[\w-]+)' _TESTS = [{ 'url': 'https://olympics.com/fr/video/men-s-109kg-group-a-weightlifting-tokyo-2020-replays', 'info_dict': { @@ -11,26 +21,105 @@ class OlympicsReplayIE(InfoExtractor): 'ext': 'mp4', 'title': '+109kg (H) Groupe A - Haltérophilie | Replay de Tokyo 2020', 'upload_date': '20210801', - 'timestamp': 1627783200, + 'timestamp': 1627797600, 'description': 'md5:c66af4a5bc7429dbcc43d15845ff03b3', - 'uploader': 'International Olympic Committee', + 'thumbnail': 'https://img.olympics.com/images/image/private/t_1-1_1280/primary/nua4o7zwyaznoaejpbk2', + 'duration': 7017.0, }, - 'params': { - 'skip_download': True, + }, { + 'url': 'https://olympics.com/en/original-series/episode/b-boys-and-b-girls-take-the-spotlight-breaking-life-road-to-paris-2024', + 'info_dict': { + 'id': '32633650-c5ee-4280-8b94-fb6defb6a9b5', + 'ext': 'mp4', + 'title': 'B-girl Nicka - Breaking Life, Road to Paris 2024 | Episode 1', + 'upload_date': '20240517', + 'timestamp': 1715948200, + 'description': 'md5:f63d728a41270ec628f6ac33ce471bb1', + 'thumbnail': 'https://img.olympics.com/images/image/private/t_1-1_1280/primary/a3j96l7j6so3vyfijby1', + 'duration': 1321.0, + }, + }, { + 'url': 'https://olympics.com/en/paris-2024/videos/men-s-preliminaries-gbr-esp-ned-rsa-hockey-olympic-games-paris-2024', + 'info_dict': { + 'id': '3d96db23-8eee-4b7c-8ef5-488a0361026c', + 'ext': 'mp4', + 'title': 'Men\'s Preliminaries GBR-ESP & NED-RSA | Hockey | Olympic Games Paris 2024', + 'upload_date': '20240727', + 'timestamp': 1722066600, }, + 'skip': 'Geo-restricted to RU, BR, BT, NP, TM, BD, TL', }, { - 'url': 'https://olympics.com/tokyo-2020/en/replay/bd242924-4b22-49a5-a846-f1d4c809250d/mens-bronze-medal-match-hun-esp', - 'only_matching': True, + 'url': 'https://olympics.com/en/paris-2024/videos/dnp-suni-lee-i-have-goals-and-i-have-expectations-for-myself-but-i-also-am-trying-to-give-myself-grace', + 'info_dict': { + 'id': 'a42f37ab-8a74-41d0-a7d9-af27b7b02a90', + 'ext': 'mp4', + 'title': 'md5:c7cfbc9918636a98e66400a812e4d407', + 'upload_date': '20240729', + 'timestamp': 1722288600, + }, }] + _GEO_BYPASS = False + + def _extract_from_nextjs_data(self, webpage, video_id): + data = traverse_obj(self._search_nextjs_data(webpage, video_id, default={}), ( + 'props', 'pageProps', 'page', 'items', + lambda _, v: v['name'] == 'videoPlaylist', 'data', 'currentVideo', {dict}, any)) + if not data: + return None + + geo_countries = traverse_obj(data, ('countries', ..., {str})) + if traverse_obj(data, ('geoRestrictedVideo', {bool})): + self.raise_geo_restricted(countries=geo_countries) + + is_live = traverse_obj(data, ('streamingStatus', {str})) == 'LIVE' + m3u8_url = traverse_obj(data, ('videoUrl', {url_or_none})) or data['streamUrl'] + tokenized_url = self._tokenize_url(m3u8_url, data['jwtToken'], is_live, video_id) + + try: + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + tokenized_url, video_id, 'mp4', m3u8_id='hls') + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and 'georestricted' in e.cause.msg: + self.raise_geo_restricted(countries=geo_countries) + raise + + return { + 'formats': formats, + 'subtitles': subtitles, + 'is_live': is_live, + **traverse_obj(data, { + 'id': ('videoID', {str}), + 'title': ('title', {str}), + 'timestamp': ('contentDate', {parse_iso8601}), + }), + } + + def _tokenize_url(self, url, token, is_live, video_id): + return self._download_json( + 'https://metering.olympics.com/tokengenerator', video_id, + 'Downloading tokenized m3u8 url', query={ + **parse_qs(url), + 'url': update_url(url, query=None), + 'service-id': 'live' if is_live else 'vod', + 'user-auth': token, + })['data']['url'] + + def _legacy_tokenize_url(self, url, video_id): + return self._download_json( + 'https://olympics.com/tokenGenerator', video_id, + 'Downloading legacy tokenized m3u8 url', query={'url': url}) def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + + if info := self._extract_from_nextjs_data(webpage, video_id): + return info + title = self._html_search_meta(('title', 'og:title', 'twitter:title'), webpage) - uuid = self._html_search_meta('episode_uid', webpage) + video_uuid = self._html_search_meta('episode_uid', webpage) m3u8_url = self._html_search_meta('video_url', webpage) - json_ld = self._search_json_ld(webpage, uuid) + json_ld = self._search_json_ld(webpage, video_uuid) thumbnails_list = json_ld.get('image') if not thumbnails_list: thumbnails_list = self._html_search_regex( @@ -48,12 +137,12 @@ def _real_extract(self, url): 'width': width, 'height': int_or_none(try_get(width, lambda x: x * height_a / width_a)), }) - m3u8_url = self._download_json( - f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url') - formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, 'mp4', m3u8_id='hls') + + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + self._legacy_tokenize_url(m3u8_url, video_uuid), video_uuid, 'mp4', m3u8_id='hls') return { - 'id': uuid, + 'id': video_uuid, 'title': title, 'thumbnails': thumbnails, 'formats': formats, diff --git a/plugin/yt-dlp/yt_dlp/extractor/picarto.py b/plugin/yt-dlp/yt_dlp/extractor/picarto.py index 726fe41..72e89c3 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/picarto.py +++ b/plugin/yt-dlp/yt_dlp/extractor/picarto.py @@ -5,6 +5,7 @@ ExtractorError, str_or_none, traverse_obj, + update_url, ) @@ -43,15 +44,16 @@ def _real_extract(self, url): url } }''' % (channel_id, channel_id), # noqa: UP031 - })['data'] + }, headers={'Accept': '*/*', 'Content-Type': 'application/json'})['data'] metadata = data['channel'] if metadata.get('online') == 0: raise ExtractorError('Stream is offline', expected=True) title = metadata['title'] - cdn_data = self._download_json( - data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js', + cdn_data = self._download_json(''.join(( + update_url(data['getLoadBalancerUrl']['url'], scheme='https'), + '/stream/json_', metadata['stream_name'], '.js')), channel_id, 'Downloading load balancing info') formats = [] @@ -99,10 +101,10 @@ class PicartoVodIE(InfoExtractor): }, 'skip': 'The VOD does not exist', }, { - 'url': 'https://picarto.tv/ArtofZod/videos/772650', - 'md5': '00067a0889f1f6869cc512e3e79c521b', + 'url': 'https://picarto.tv/ArtofZod/videos/771008', + 'md5': 'abef5322f2700d967720c4c6754b2a34', 'info_dict': { - 'id': '772650', + 'id': '771008', 'ext': 'mp4', 'title': 'Art of Zod - Drawing and Painting', 'thumbnail': r're:^https?://.*\.jpg', @@ -131,7 +133,7 @@ def _real_extract(self, url): }} }} }}''', - })['data']['video'] + }, headers={'Accept': '*/*', 'Content-Type': 'application/json'})['data']['video'] file_name = data['file_name'] netloc = urllib.parse.urlparse(data['video_recording_image_url']).netloc diff --git a/plugin/yt-dlp/yt_dlp/extractor/soundcloud.py b/plugin/yt-dlp/yt_dlp/extractor/soundcloud.py index afb512d..4f8d964 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/soundcloud.py +++ b/plugin/yt-dlp/yt_dlp/extractor/soundcloud.py @@ -871,7 +871,7 @@ class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE): 'id': '30909869', 'title': 'neilcic', }, - 'playlist_mincount': 23, + 'playlist_mincount': 22, }] def _real_extract(self, url): @@ -880,7 +880,7 @@ def _real_extract(self, url): self._resolv_url(url), user_id, 'Downloading user info', headers=self._HEADERS) return self._extract_playlist( - f'{self._API_V2_BASE}stream/users/{user["id"]}', str(user['id']), user.get('username')) + f'{self._API_V2_BASE}users/{user["id"]}/tracks', str(user['id']), user.get('username')) class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): diff --git a/plugin/yt-dlp/yt_dlp/extractor/tiktok.py b/plugin/yt-dlp/yt_dlp/extractor/tiktok.py index c3505b1..9d823a3 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/tiktok.py +++ b/plugin/yt-dlp/yt_dlp/extractor/tiktok.py @@ -23,7 +23,6 @@ mimetype2ext, parse_qs, qualities, - remove_start, srt_subtitles_timecode, str_or_none, traverse_obj, @@ -254,7 +253,16 @@ def _extract_web_data_and_status(self, url, video_id, fatal=True): def _get_subtitles(self, aweme_detail, aweme_id, user_name): # TODO: Extract text positioning info + + EXT_MAP = { # From lowest to highest preference + 'creator_caption': 'json', + 'srt': 'srt', + 'webvtt': 'vtt', + } + preference = qualities(tuple(EXT_MAP.values())) + subtitles = {} + # aweme/detail endpoint subs captions_info = traverse_obj( aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict) @@ -278,8 +286,8 @@ def _get_subtitles(self, aweme_detail, aweme_id, user_name): if not caption.get('url'): continue subtitles.setdefault(caption.get('lang') or 'en', []).append({ - 'ext': remove_start(caption.get('caption_format'), 'web'), 'url': caption['url'], + 'ext': EXT_MAP.get(caption.get('Format')), }) # webpage subs if not subtitles: @@ -288,9 +296,14 @@ def _get_subtitles(self, aweme_detail, aweme_id, user_name): self._create_url(user_name, aweme_id), aweme_id, fatal=False) for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])): subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({ - 'ext': remove_start(caption.get('Format'), 'web'), 'url': caption['Url'], + 'ext': EXT_MAP.get(caption.get('Format')), }) + + # Deprioritize creator_caption json since it can't be embedded or used by media players + for lang, subs_list in subtitles.items(): + subtitles[lang] = sorted(subs_list, key=lambda x: preference(x['ext'])) + return subtitles def _parse_url_key(self, url_key): @@ -1458,9 +1471,11 @@ def _real_extract(self, url): if webpage: data = self._get_sigi_state(webpage, uploader or room_id) - room_id = (traverse_obj(data, ('UserModule', 'users', ..., 'roomId', {str_or_none}), get_all=False) - or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None) - or room_id) + room_id = ( + traverse_obj(data, (( + ('LiveRoom', 'liveRoomUserInfo', 'user'), + ('UserModule', 'users', ...)), 'roomId', {str}, any)) + or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=room_id)) uploader = uploader or traverse_obj( data, ('LiveRoom', 'liveRoomUserInfo', 'user', 'uniqueId'), ('UserModule', 'users', ..., 'uniqueId'), get_all=False, expected_type=str) diff --git a/plugin/yt-dlp/yt_dlp/extractor/tv5mondeplus.py b/plugin/yt-dlp/yt_dlp/extractor/tv5mondeplus.py index 52ff230..953eb77 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/tv5mondeplus.py +++ b/plugin/yt-dlp/yt_dlp/extractor/tv5mondeplus.py @@ -96,7 +96,7 @@ def _extract_subtitles(data_captions): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + webpage = self._download_webpage(url, display_id, impersonate=True) if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage: self.raise_geo_restricted(countries=['FR']) @@ -122,8 +122,9 @@ def process_video_files(v): if not token: continue deferred_json = self._download_json( - f'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true', display_id, - note='Downloading deferred info', headers={'Authorization': f'Bearer {token}'}, fatal=False) + f'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true', + display_id, 'Downloading deferred info', fatal=False, impersonate=True, + headers={'Authorization': f'Bearer {token}'}) v_url = traverse_obj(deferred_json, (0, 'url', {url_or_none})) if not v_url: continue diff --git a/plugin/yt-dlp/yt_dlp/extractor/tva.py b/plugin/yt-dlp/yt_dlp/extractor/tva.py index e3e1055..d702640 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/tva.py +++ b/plugin/yt-dlp/yt_dlp/extractor/tva.py @@ -1,60 +1,29 @@ import functools import re +from .brightcove import BrightcoveNewIE from .common import InfoExtractor from ..utils import float_or_none, int_or_none, smuggle_url, strip_or_none from ..utils.traversal import traverse_obj class TVAIE(InfoExtractor): - _VALID_URL = r'https?://videos?\.tva\.ca/details/_(?P\d+)' + IE_NAME = 'tvaplus' + IE_DESC = 'TVA+' + _VALID_URL = r'https?://(?:www\.)?tvaplus\.ca/(?:[^/?#]+/)*[\w-]+-(?P\d+)(?:$|[#?])' _TESTS = [{ - 'url': 'https://videos.tva.ca/details/_5596811470001', - 'info_dict': { - 'id': '5596811470001', - 'ext': 'mp4', - 'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !', - 'uploader_id': '5481942443001', - 'upload_date': '20171003', - 'timestamp': 1507064617, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'skip': 'HTTP Error 404: Not Found', - }, { - 'url': 'https://video.tva.ca/details/_5596811470001', - 'only_matching': True, - }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s' - - def _real_extract(self, url): - video_id = self._match_id(url) - - return { - '_type': 'url_transparent', - 'id': video_id, - 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}), - 'ie_key': 'BrightcoveNew', - } - - -class QubIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?qub\.ca/(?:[^/]+/)*[0-9a-z-]+-(?P\d+)' - _TESTS = [{ - 'url': 'https://www.qub.ca/tvaplus/tva/alerte-amber/saison-1/episode-01-1000036619', + 'url': 'https://www.tvaplus.ca/tva/alerte-amber/saison-1/episode-01-1000036619', 'md5': '949490fd0e7aee11d0543777611fbd53', 'info_dict': { 'id': '6084352463001', 'ext': 'mp4', - 'title': 'Ép 01. Mon dernier jour', + 'title': 'Mon dernier jour', 'uploader_id': '5481942443001', 'upload_date': '20190907', 'timestamp': 1567899756, 'description': 'md5:9c0d7fbb90939420c651fd977df90145', 'thumbnail': r're:https://.+\.jpg', - 'episode': 'Ép 01. Mon dernier jour', + 'episode': 'Mon dernier jour', 'episode_number': 1, 'tags': ['alerte amber', 'alerte amber saison 1', 'surdemande'], 'duration': 2625.963, @@ -64,23 +33,36 @@ class QubIE(InfoExtractor): 'channel': 'TVA', }, }, { - 'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943', - 'only_matching': True, + 'url': 'https://www.tvaplus.ca/tva/le-baiser-du-barbu/le-baiser-du-barbu-886644190', + 'info_dict': { + 'id': '6354448043112', + 'ext': 'mp4', + 'title': 'Le Baiser du barbu', + 'uploader_id': '5481942443001', + 'upload_date': '20240606', + 'timestamp': 1717694023, + 'description': 'md5:025b1219086c1cbf4bc27e4e034e8b57', + 'thumbnail': r're:https://.+\.jpg', + 'episode': 'Le Baiser du barbu', + 'tags': ['fullepisode', 'films'], + 'duration': 6053.504, + 'series': 'Le Baiser du barbu', + 'channel': 'TVA', + }, }] - # reference_id also works with old account_id(5481942443001) - # BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5813221784001/default_default/index.html?videoId=ref:%s' + _BC_URL_TMPL = 'https://players.brightcove.net/5481942443001/default_default/index.html?videoId={}' def _real_extract(self, url): entity_id = self._match_id(url) webpage = self._download_webpage(url, entity_id) - entity = self._search_nextjs_data(webpage, entity_id)['props']['initialProps']['pageProps']['fallbackData'] + entity = self._search_nextjs_data(webpage, entity_id)['props']['pageProps']['staticEntity'] video_id = entity['videoId'] episode = strip_or_none(entity.get('name')) return { '_type': 'url_transparent', - 'url': f'https://videos.tva.ca/details/_{video_id}', - 'ie_key': TVAIE.ie_key(), + 'url': smuggle_url(self._BC_URL_TMPL.format(video_id), {'geo_countries': ['CA']}), + 'ie_key': BrightcoveNewIE.ie_key(), 'id': video_id, 'title': episode, 'episode': episode, diff --git a/plugin/yt-dlp/yt_dlp/extractor/tver.py b/plugin/yt-dlp/yt_dlp/extractor/tver.py index 8105db4..c13832c 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/tver.py +++ b/plugin/yt-dlp/yt_dlp/extractor/tver.py @@ -10,7 +10,7 @@ class TVerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?Plp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?Plp|corner|series|episodes?|feature|tokyo2020/video|olympic/paris2024/video)/)+(?P[a-zA-Z0-9]+)' _TESTS = [{ 'skip': 'videos are only available for 7 days', 'url': 'https://tver.jp/episodes/ep83nf3w4p', @@ -23,6 +23,20 @@ class TVerIE(InfoExtractor): 'channel': 'テレビ朝日', }, 'add_ie': ['BrightcoveNew'], + }, { + 'url': 'https://tver.jp/olympic/paris2024/video/6359578055112/', + 'info_dict': { + 'id': '6359578055112', + 'ext': 'mp4', + 'title': '堀米雄斗 金メダルで五輪連覇!「みんなの応援が最後に乗れたカギ」', + 'timestamp': 1722279928, + 'upload_date': '20240729', + 'tags': ['20240729', 'japanese', 'japanmedal', 'paris'], + 'uploader_id': '4774017240001', + 'thumbnail': r're:https?://[^/?#]+boltdns\.net/[^?#]+/1920x1080/match/image\.jpg', + 'duration': 670.571, + }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://tver.jp/corner/f0103888', 'only_matching': True, @@ -47,7 +61,15 @@ def _real_initialize(self): def _real_extract(self, url): video_id, video_type = self._match_valid_url(url).group('id', 'type') - if video_type not in {'series', 'episodes'}: + + if video_type == 'olympic/paris2024/video': + # Player ID is taken from .content.brightcove.E200.pro.pc.account_id: + # https://tver.jp/olympic/paris2024/req/api/hook?q=https%3A%2F%2Folympic-assets.tver.jp%2Fweb-static%2Fjson%2Fconfig.json&d= + return self.url_result(smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % ('4774017240001', video_id), + {'geo_countries': ['JP']}), 'BrightcoveNew') + + elif video_type not in {'series', 'episodes'}: webpage = self._download_webpage(url, video_id, note='Resolving to new URL') video_id = self._match_id(self._search_regex( (r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'), diff --git a/plugin/yt-dlp/yt_dlp/extractor/unsupported.py b/plugin/yt-dlp/yt_dlp/extractor/unsupported.py index 1e2d118..8b7ec1d 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/unsupported.py +++ b/plugin/yt-dlp/yt_dlp/extractor/unsupported.py @@ -49,6 +49,7 @@ class KnownDRMIE(UnsupportedInfoExtractor): r'amazon\.(?:\w{2}\.)?\w+/gp/video', r'music\.amazon\.(?:\w{2}\.)?\w+', r'(?:watch|front)\.njpwworld\.com', + r'qub\.ca/vrai', ) _TESTS = [{ @@ -149,6 +150,9 @@ class KnownDRMIE(UnsupportedInfoExtractor): }, { 'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs', 'only_matching': True, + }, { + 'url': 'https://www.qub.ca/vrai/l-effet-bocuse-d-or/saison-1/l-effet-bocuse-d-or-saison-1-bande-annonce-1098225063', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/plugin/yt-dlp/yt_dlp/extractor/vimeo.py b/plugin/yt-dlp/yt_dlp/extractor/vimeo.py index 18eb084..a20cf4b 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/vimeo.py +++ b/plugin/yt-dlp/yt_dlp/extractor/vimeo.py @@ -212,16 +212,6 @@ def _parse_config(self, config, video_id): owner = video_data.get('owner') or {} video_uploader_url = owner.get('url') - duration = int_or_none(video_data.get('duration')) - chapter_data = try_get(config, lambda x: x['embed']['chapters']) or [] - chapters = [{ - 'title': current_chapter.get('title'), - 'start_time': current_chapter.get('timecode'), - 'end_time': next_chapter.get('timecode'), - } for current_chapter, next_chapter in zip(chapter_data, chapter_data[1:] + [{'timecode': duration}])] - if chapters and chapters[0]['start_time']: # Chapters may not start from 0 - chapters[:0] = [{'title': '', 'start_time': 0, 'end_time': chapters[0]['start_time']}] - return { 'id': str_or_none(video_data.get('id')) or video_id, 'title': video_title, @@ -229,8 +219,12 @@ def _parse_config(self, config, video_id): 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None, 'uploader_url': video_uploader_url, 'thumbnails': thumbnails, - 'duration': duration, - 'chapters': chapters or None, + 'duration': int_or_none(video_data.get('duration')), + 'chapters': sorted(traverse_obj(config, ( + 'embed', 'chapters', lambda _, v: int(v['timecode']) is not None, { + 'title': ('title', {str}), + 'start_time': ('timecode', {int_or_none}), + })), key=lambda c: c['start_time']) or None, 'formats': formats, 'subtitles': subtitles, 'live_status': live_status, @@ -708,6 +702,39 @@ class VimeoIE(VimeoBaseInfoExtractor): 'skip_download': True, }, }, + { + # chapters must be sorted, see: https://github.com/yt-dlp/yt-dlp/issues/5308 + 'url': 'https://player.vimeo.com/video/756714419', + 'info_dict': { + 'id': '756714419', + 'ext': 'mp4', + 'title': 'Dr Arielle Schwartz - Therapeutic yoga for optimum sleep', + 'uploader': 'Alex Howard', + 'uploader_id': 'user54729178', + 'uploader_url': 'https://vimeo.com/user54729178', + 'thumbnail': r're:https://i\.vimeocdn\.com/video/1520099929-[\da-f]+-d_1280', + 'duration': 2636, + 'chapters': [ + {'start_time': 0, 'end_time': 10, 'title': ''}, + {'start_time': 10, 'end_time': 106, 'title': 'Welcoming Dr Arielle Schwartz'}, + {'start_time': 106, 'end_time': 305, 'title': 'What is therapeutic yoga?'}, + {'start_time': 305, 'end_time': 594, 'title': 'Vagal toning practices'}, + {'start_time': 594, 'end_time': 888, 'title': 'Trauma and difficulty letting go'}, + {'start_time': 888, 'end_time': 1059, 'title': "Dr Schwartz' insomnia experience"}, + {'start_time': 1059, 'end_time': 1471, 'title': 'A strategy for helping sleep issues'}, + {'start_time': 1471, 'end_time': 1667, 'title': 'Yoga nidra'}, + {'start_time': 1667, 'end_time': 2121, 'title': 'Wisdom in stillness'}, + {'start_time': 2121, 'end_time': 2386, 'title': 'What helps us be more able to let go?'}, + {'start_time': 2386, 'end_time': 2510, 'title': 'Practical tips to help ourselves'}, + {'start_time': 2510, 'end_time': 2636, 'title': 'Where to find out more'}, + ], + }, + 'params': { + 'http_headers': {'Referer': 'https://sleepsuperconference.com'}, + 'skip_download': 'm3u8', + }, + 'expected_warnings': ['Failed to parse XML: not well-formed'], + }, { # user playlist alias -> https://vimeo.com/258705797 'url': 'https://vimeo.com/user26785108/newspiritualguide', @@ -1240,7 +1267,7 @@ class VimeoGroupsIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE class VimeoReviewIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:review' IE_DESC = 'Review pages on vimeo' - _VALID_URL = r'(?Phttps://vimeo\.com/[^/]+/review/(?P[^/]+)/[0-9a-f]{10})' + _VALID_URL = r'https?://vimeo\.com/(?P[^/?#]+)/review/(?P\d+)/(?P[\da-f]{10})' _TESTS = [{ 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', 'md5': 'c507a72f780cacc12b2248bb4006d253', @@ -1286,26 +1313,22 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): }] def _real_extract(self, url): - page_url, video_id = self._match_valid_url(url).groups() - data = self._download_json( - page_url.replace('/review/', '/review/data/'), video_id) + user, video_id, review_hash = self._match_valid_url(url).group('user', 'id', 'hash') + data_url = f'https://vimeo.com/{user}/review/data/{video_id}/{review_hash}' + data = self._download_json(data_url, video_id) if data.get('isLocked') is True: video_password = self._get_video_password() viewer = self._download_json( 'https://vimeo.com/_rv/viewer', video_id) - webpage = self._verify_video_password(video_id, video_password, viewer['xsrft']) - clip_page_config = self._parse_json(self._search_regex( - r'window\.vimeo\.clip_page_config\s*=\s*({.+?});', - webpage, 'clip page config'), video_id) - config_url = clip_page_config['player']['config_url'] - clip_data = clip_page_config.get('clip') or {} - else: - clip_data = data['clipData'] - config_url = clip_data['configUrl'] + self._verify_video_password(video_id, video_password, viewer['xsrft']) + data = self._download_json(data_url, video_id) + clip_data = data['clipData'] + config_url = clip_data['configUrl'] config = self._download_json(config_url, video_id) info_dict = self._parse_config(config, video_id) source_format = self._extract_original_format( - page_url + '/action', video_id) + f'https://vimeo.com/{user}/review/{video_id}/{review_hash}/action', video_id, + unlisted_hash=traverse_obj(config_url, ({parse_qs}, 'h', -1))) if source_format: info_dict['formats'].append(source_format) info_dict['description'] = clean_html(clip_data.get('description')) diff --git a/plugin/yt-dlp/yt_dlp/extractor/youku.py b/plugin/yt-dlp/yt_dlp/extractor/youku.py index fa6b053..3bdfa6c 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/youku.py +++ b/plugin/yt-dlp/yt_dlp/extractor/youku.py @@ -136,7 +136,7 @@ def _real_extract(self, url): # request basic data basic_data_params = { 'vid': video_id, - 'ccode': '0524', + 'ccode': '0564', 'client_ip': '192.168.1.1', 'utid': cna, 'client_ts': time.time() / 1000, diff --git a/plugin/yt-dlp/yt_dlp/extractor/youtube.py b/plugin/yt-dlp/yt_dlp/extractor/youtube.py index 1c0a70d..224c9b9 100644 --- a/plugin/yt-dlp/yt_dlp/extractor/youtube.py +++ b/plugin/yt-dlp/yt_dlp/extractor/youtube.py @@ -72,133 +72,169 @@ # any clients starting with _ cannot be explicitly requested by the user INNERTUBE_CLIENTS = { 'web': { - 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB', - 'clientVersion': '2.20220801.00.00', + 'clientVersion': '2.20240726.00.00', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, + }, + # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats + 'web_safari': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'WEB', + 'clientVersion': '2.20240726.00.00', + 'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, }, 'web_embedded': { - 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB_EMBEDDED_PLAYER', - 'clientVersion': '1.20220731.00.00', + 'clientVersion': '1.20240723.01.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 56, }, 'web_music': { - 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30', 'INNERTUBE_HOST': 'music.youtube.com', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB_REMIX', - 'clientVersion': '1.20220727.01.00', + 'clientVersion': '1.20240724.00.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 67, }, 'web_creator': { - 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB_CREATOR', - 'clientVersion': '1.20220726.00.00', + 'clientVersion': '1.20240723.03.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 62, }, 'android': { - 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID', - 'clientVersion': '19.09.37', + 'clientVersion': '19.29.37', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip', + 'userAgent': 'com.google.android.youtube/19.29.37 (Linux; U; Android 11) gzip', + 'osName': 'Android', + 'osVersion': '11', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 3, 'REQUIRE_JS_PLAYER': False, }, - 'android_embedded': { - 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw', + 'android_music': { 'INNERTUBE_CONTEXT': { 'client': { - 'clientName': 'ANDROID_EMBEDDED_PLAYER', - 'clientVersion': '19.09.37', + 'clientName': 'ANDROID_MUSIC', + 'clientVersion': '7.11.50', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip', + 'userAgent': 'com.google.android.apps.youtube.music/7.11.50 (Linux; U; Android 11) gzip', + 'osName': 'Android', + 'osVersion': '11', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 55, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, 'REQUIRE_JS_PLAYER': False, }, - 'android_music': { - 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI', + 'android_creator': { 'INNERTUBE_CONTEXT': { 'client': { - 'clientName': 'ANDROID_MUSIC', - 'clientVersion': '6.42.52', + 'clientName': 'ANDROID_CREATOR', + 'clientVersion': '24.30.100', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip', + 'userAgent': 'com.google.android.apps.youtube.creator/24.30.100 (Linux; U; Android 11) gzip', + 'osName': 'Android', + 'osVersion': '11', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 14, 'REQUIRE_JS_PLAYER': False, }, - 'android_creator': { - 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8', + # YouTube Kids videos aren't returned on this client for some reason + 'android_vr': { 'INNERTUBE_CONTEXT': { 'client': { - 'clientName': 'ANDROID_CREATOR', - 'clientVersion': '22.30.100', + 'clientName': 'ANDROID_VR', + 'clientVersion': '1.57.29', + 'deviceMake': 'Oculus', + 'deviceModel': 'Quest 3', + 'androidSdkVersion': 32, + 'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.57.29 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip', + 'osName': 'Android', + 'osVersion': '12L', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 28, + 'REQUIRE_JS_PLAYER': False, + }, + 'android_testsuite': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'ANDROID_TESTSUITE', + 'clientVersion': '1.9', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip', + 'userAgent': 'com.google.android.youtube/1.9 (Linux; U; Android 11) gzip', + 'osName': 'Android', + 'osVersion': '11', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 14, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 30, 'REQUIRE_JS_PLAYER': False, + 'PLAYER_PARAMS': '2AMB', }, - # iOS clients have HLS live streams. Setting device model to get 60fps formats. - # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 - 'ios': { - 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc', + # This client only has legacy formats and storyboards + 'android_producer': { 'INNERTUBE_CONTEXT': { 'client': { - 'clientName': 'IOS', - 'clientVersion': '19.09.3', - 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', + 'clientName': 'ANDROID_PRODUCER', + 'clientVersion': '0.111.1', + 'androidSdkVersion': 30, + 'userAgent': 'com.google.android.apps.youtube.producer/0.111.1 (Linux; U; Android 11) gzip', + 'osName': 'Android', + 'osVersion': '11', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 91, 'REQUIRE_JS_PLAYER': False, }, - 'ios_embedded': { + # iOS clients have HLS live streams. Setting device model to get 60fps formats. + # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 + 'ios': { 'INNERTUBE_CONTEXT': { 'client': { - 'clientName': 'IOS_MESSAGES_EXTENSION', - 'clientVersion': '19.09.3', - 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', + 'clientName': 'IOS', + 'clientVersion': '19.29.1', + 'deviceMake': 'Apple', + 'deviceModel': 'iPhone16,2', + 'userAgent': 'com.google.ios.youtube/19.29.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)', + 'osName': 'iPhone', + 'osVersion': '17.5.1.21F90', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 66, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 'REQUIRE_JS_PLAYER': False, }, 'ios_music': { - 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS_MUSIC', - 'clientVersion': '6.33.3', - 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', + 'clientVersion': '7.08.2', + 'deviceMake': 'Apple', + 'deviceModel': 'iPhone16,2', + 'userAgent': 'com.google.ios.youtubemusic/7.08.2 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)', + 'osName': 'iPhone', + 'osVersion': '17.5.1.21F90', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 26, @@ -208,9 +244,12 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS_CREATOR', - 'clientVersion': '22.33.101', - 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', + 'clientVersion': '24.30.100', + 'deviceMake': 'Apple', + 'deviceModel': 'iPhone16,2', + 'userAgent': 'com.google.ios.ytcreator/24.30.100 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)', + 'osName': 'iPhone', + 'osVersion': '17.5.1.21F90', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 15, @@ -219,19 +258,26 @@ # mweb has 'ultralow' formats # See: https://github.com/yt-dlp/yt-dlp/pull/557 'mweb': { - 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'MWEB', - 'clientVersion': '2.20220801.00.00', + 'clientVersion': '2.20240726.01.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 2, }, + 'tv': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'TVHTML5', + 'clientVersion': '7.20240724.13.00', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, + }, # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option) # See: https://github.com/zerodytrash/YouTube-Internal-Clients 'tv_embedded': { - 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', @@ -249,6 +295,7 @@ }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 95, + 'REQUIRE_JS_PLAYER': False, }, } @@ -262,7 +309,7 @@ def _split_innertube_client(client_name): def short_client_name(client_name): - main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_') + main, *parts = _split_innertube_client(client_name)[0].split('_') return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper() @@ -270,27 +317,22 @@ def build_innertube_clients(): THIRD_PARTY = { 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL } - BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb') + BASE_CLIENTS = ('ios', 'web', 'tv', 'mweb', 'android') priority = qualities(BASE_CLIENTS[::-1]) for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): - ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8') ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com') ytcfg.setdefault('REQUIRE_JS_PLAYER', True) + ytcfg.setdefault('PLAYER_PARAMS', None) ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') _, base_client, variant = _split_innertube_client(client) ytcfg['priority'] = 10 * priority(base_client) - if not variant: - INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg) - embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED' - embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY - embedscreen['priority'] -= 3 - elif variant == 'embedded': + if variant == 'embedded': ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY ytcfg['priority'] -= 2 - else: + elif variant: ytcfg['priority'] -= 3 @@ -566,9 +608,6 @@ def _select_api_hostname(self, req_api_hostname, default_client=None): return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0] or req_api_hostname or self._get_innertube_host(default_client or 'web')) - def _extract_api_key(self, ytcfg=None, default_client='web'): - return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client) - def _extract_context(self, ytcfg=None, default_client='web'): context = get_first( (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict) @@ -614,13 +653,15 @@ def _call_api(self, ep, query, video_id, fatal=True, headers=None, real_headers.update({'content-type': 'application/json'}) if headers: real_headers.update(headers) - api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0] - or api_key or self._extract_api_key(default_client=default_client)) return self._download_json( f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}', video_id=video_id, fatal=fatal, note=note, errnote=errnote, data=json.dumps(data).encode('utf8'), headers=real_headers, - query={'key': api_key, 'prettyPrint': 'false'}) + query=filter_dict({ + 'key': self._configuration_arg( + 'innertube_key', [api_key], ie_key=YoutubeIE.ie_key(), casesense=True)[0], + 'prettyPrint': 'false', + }, cndn=lambda _, v: v)) def extract_yt_initial_data(self, item_id, webpage, fatal=True): return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal) @@ -972,7 +1013,6 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers ep=ep, fatal=True, headers=headers, video_id=item_id, query=query, note=note, context=self._extract_context(ytcfg, default_client), - api_key=self._extract_api_key(ytcfg, default_client), api_hostname=api_hostname, default_client=default_client) except ExtractorError as e: if not isinstance(e.cause, network_exceptions): @@ -1294,6 +1334,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'}, } _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt') + _POTOKEN_EXPERIMENTS = ('51217476', '51217102') + _BROKEN_CLIENTS = { + short_client_name(client): client + for client in ('android', 'android_creator', 'android_music') + } _GEO_BYPASS = False @@ -3128,12 +3173,42 @@ def _decrypt_nsig(self, s, video_id, player_url): self.write_debug(f'Decrypted nsig {s} => {ret}') return ret - def _extract_n_function_name(self, jscode): + def _extract_n_function_name(self, jscode, player_url=None): + # Examples (with placeholders nfunc, narray, idx): + # * .get("n"))&&(b=nfunc(b) + # * .get("n"))&&(b=narray[idx](b) + # * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c) + # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("") + # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("") + # * a.D&&(b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("") funcname, idx = self._search_regex( - r'''(?x)(?:\.get\("n"\)\)&&\(b=|b=String\.fromCharCode\(110\),c=a\.get\(b\)\)&&\(c=) - (?P[a-zA-Z0-9$]+)(?:\[(?P\d+)\])?\([a-zA-Z0-9]\)''', - jscode, 'Initial JS player n function name', group=('nfunc', 'idx')) - if not idx: + r'''(?x) + (?: + \.get\("n"\)\)&&\(b=| + (?: + b=String\.fromCharCode\(110\)| + (?P[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\] + ) + (?: + ,[a-zA-Z0-9_$]+\(a\))?,c=a\. + (?: + get\(b\)| + [a-zA-Z0-9_$]+\[b\]\|\|null + )\)&&\(c=| + \b(?P[a-zA-Z0-9_$]+)= + )(?P[a-zA-Z0-9_$]+)(?:\[(?P\d+)\])?\([a-zA-Z]\) + (?(var),[a-zA-Z0-9_$]+\.set\("n"\,(?P=var)\),(?P=nfunc)\.length)''', + jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None)) + if not funcname: + self.report_warning(join_nonempty( + 'Falling back to generic n function search', + player_url and f' player = {player_url}', delim='\n')) + return self._search_regex( + r'''(?xs) + ;\s*(?P[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\) + \s*\{(?:(?!};).)+?["']enhanced_except_''', + jscode, 'Initial JS player n function name', group='name') + elif not idx: return funcname return json.loads(js_to_json(self._search_regex( @@ -3142,14 +3217,14 @@ def _extract_n_function_name(self, jscode): def _extract_n_function_code(self, video_id, player_url): player_id = self._extract_player_info(player_url) - func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1') + func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09') jscode = func_code or self._load_player(video_id, player_url) jsi = JSInterpreter(jscode) if func_code: return jsi, player_id, func_code - func_name = self._extract_n_function_name(jscode) + func_name = self._extract_n_function_name(jscode, player_url=player_url) func_code = jsi.extract_function_code(func_name) @@ -3653,9 +3728,10 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, 'videoId': video_id, } - pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0] - if pp_arg: - yt_query['params'] = pp_arg + default_pp = traverse_obj( + INNERTUBE_CLIENTS, (_split_innertube_client(client)[0], 'PLAYER_PARAMS', {str})) + if player_params := self._configuration_arg('player_params', [default_pp], casesense=True)[0]: + yt_query['params'] = player_params yt_query.update(self._generate_player_context(sts)) return self._extract_response( @@ -3667,8 +3743,8 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, def _get_requested_clients(self, url, smuggled_data): requested_clients = [] - android_clients = [] - default = ['ios', 'web'] + broken_clients = [] + default = ['ios', 'web_creator'] allowed_clients = sorted( (client for client in INNERTUBE_CLIENTS if client[:1] != '_'), key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True) @@ -3679,18 +3755,21 @@ def _get_requested_clients(self, url, smuggled_data): requested_clients.extend(allowed_clients) elif client not in allowed_clients: self.report_warning(f'Skipping unsupported client {client}') - elif client.startswith('android'): - android_clients.append(client) + elif client in self._BROKEN_CLIENTS.values(): + broken_clients.append(client) else: requested_clients.append(client) - # Force deprioritization of broken Android clients for format de-duplication - requested_clients.extend(android_clients) + # Force deprioritization of _BROKEN_CLIENTS for format de-duplication + requested_clients.extend(broken_clients) if not requested_clients: requested_clients = default if smuggled_data.get('is_music_url') or self.is_music_url(url): - requested_clients.extend( - f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS) + for requested_client in requested_clients: + _, base_client, variant = _split_innertube_client(requested_client) + music_client = f'{base_client}_music' + if variant != 'music' and music_client in INNERTUBE_CLIENTS: + requested_clients.append(music_client) return orderedSet(requested_clients) @@ -3701,8 +3780,15 @@ def _invalid_player_response(self, pr, video_id): return pr_id def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data): - initial_pr = None + initial_pr = ignore_initial_response = None if webpage: + if 'web' in clients: + experiments = traverse_obj(master_ytcfg, ( + 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'serializedExperimentIds', {lambda x: x.split(',')}, ...)) + if all(x in experiments for x in self._POTOKEN_EXPERIMENTS): + self.report_warning( + 'Webpage contains broken formats (poToken experiment detected). Ignoring initial player response') + ignore_initial_response = True initial_pr = self._search_json( self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False) @@ -3732,8 +3818,10 @@ def append_client(*client_names): skipped_clients = {} while clients: client, base_client, variant = _split_innertube_client(clients.pop()) - player_ytcfg = master_ytcfg if client == 'web' else {} - if 'configs' not in self._configuration_arg('player_skip') and client != 'web': + player_ytcfg = {} + if client == 'web': + player_ytcfg = self._get_default_ytcfg() if ignore_initial_response else master_ytcfg + elif 'configs' not in self._configuration_arg('player_skip'): player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage) @@ -3746,11 +3834,22 @@ def append_client(*client_names): player_url = self._download_player_url(video_id) tried_iframe_fallback = True - try: - pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response( - client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data) - except ExtractorError as e: - self.report_warning(e) + pr = initial_pr if client == 'web' and not ignore_initial_response else None + for retry in self.RetryManager(fatal=False): + try: + pr = pr or self._extract_player_response( + client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, + player_url if require_js_player else None, initial_pr, smuggled_data) + except ExtractorError as e: + self.report_warning(e) + break + experiments = traverse_obj(pr, ( + 'responseContext', 'serviceTrackingParams', lambda _, v: v['service'] == 'GFEEDBACK', + 'params', lambda _, v: v['key'] == 'e', 'value', {lambda x: x.split(',')}, ...)) + if all(x in experiments for x in self._POTOKEN_EXPERIMENTS): + pr = None + retry.error = ExtractorError('API returned broken formats (poToken experiment detected)', expected=True) + if not pr: continue if pr_id := self._invalid_player_response(pr, video_id): @@ -3764,14 +3863,27 @@ def append_client(*client_names): f[STREAMING_DATA_CLIENT_NAME] = name prs.append(pr) - # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in - if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated: - append_client(f'{base_client}_creator') - elif self._is_agegated(pr): - if variant == 'tv_embedded': - append_client(f'{base_client}_embedded') - elif not variant: - append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded') + # tv_embedded can work around age-gate and age-verification IF the video is embeddable + if self._is_agegated(pr) and variant != 'tv_embedded': + append_client(f'tv_embedded.{base_client}') + + # Unauthenticated users will only get tv_embedded client formats if age-gated + if self._is_agegated(pr) and not self.is_authenticated: + self.to_screen( + f'{video_id}: This video is age-restricted; some formats may be missing ' + f'without authentication. {self._login_hint()}', only_once=True) + + # EU countries require age-verification for accounts to access age-restricted videos + # If account is not age-verified, _is_agegated() will be truthy for non-embedded clients + # If embedding is disabled for the video, _is_unplayable() will be truthy for tv_embedded + embedding_is_disabled = variant == 'tv_embedded' and self._is_unplayable(pr) + if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled): + self.to_screen( + f'{video_id}: This video is age-restricted and YouTube is requiring ' + 'account age-verification; some formats may be missing', only_once=True) + # web_creator and mediaconnect can work around the age-verification requirement + # _producer, _testsuite, & _vr variants can also work around age-verification + append_client('web_creator', 'mediaconnect') if skipped_clients: self.report_warning( @@ -3907,13 +4019,13 @@ def build_fragments(f): f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) client_name = fmt.get(STREAMING_DATA_CLIENT_NAME) - # Android client formats are broken due to integrity check enforcement + # _BROKEN_CLIENTS return videoplayback URLs that expire after 30 seconds # Ref: https://github.com/yt-dlp/yt-dlp/issues/9554 - is_broken = client_name and client_name.startswith(short_client_name('android')) + is_broken = client_name in self._BROKEN_CLIENTS if is_broken: self.report_warning( - f'{video_id}: Android client formats are broken and may yield HTTP Error 403. ' - 'They will be deprioritized', only_once=True) + f'{video_id}: {self._BROKEN_CLIENTS[client_name]} client formats are broken ' + 'and may yield HTTP Error 403. They will be deprioritized', only_once=True) name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or '' fps = int_or_none(fmt.get('fps')) or 0 diff --git a/plugin/yt-dlp/yt_dlp/jsinterp.py b/plugin/yt-dlp/yt_dlp/jsinterp.py index 851d4dc..ba059ba 100644 --- a/plugin/yt-dlp/yt_dlp/jsinterp.py +++ b/plugin/yt-dlp/yt_dlp/jsinterp.py @@ -709,9 +709,9 @@ def eval_method(): obj.reverse() return obj elif member == 'slice': - assertion(isinstance(obj, list), 'must be applied on a list') - assertion(len(argvals) == 1, 'takes exactly one argument') - return obj[argvals[0]:] + assertion(isinstance(obj, (list, str)), 'must be applied on a list or string') + assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments') + return obj[slice(*argvals, None)] elif member == 'splice': assertion(isinstance(obj, list), 'must be applied on a list') assertion(argvals, 'takes one or more arguments') diff --git a/plugin/yt-dlp/yt_dlp/networking/_curlcffi.py b/plugin/yt-dlp/yt_dlp/networking/_curlcffi.py index b1f0fb8..e8a67b7 100644 --- a/plugin/yt-dlp/yt_dlp/networking/_curlcffi.py +++ b/plugin/yt-dlp/yt_dlp/networking/_curlcffi.py @@ -2,6 +2,7 @@ import io import math +import re import urllib.parse from ._helper import InstanceStoreMixin, select_proxy @@ -27,11 +28,12 @@ if curl_cffi is None: raise ImportError('curl_cffi is not installed') -curl_cffi_version = tuple(int_or_none(x, default=0) for x in curl_cffi.__version__.split('.')) -if curl_cffi_version != (0, 5, 10): +curl_cffi_version = tuple(map(int, re.split(r'[^\d]+', curl_cffi.__version__)[:3])) + +if curl_cffi_version != (0, 5, 10) and not ((0, 7, 0) <= curl_cffi_version < (0, 8, 0)): curl_cffi._yt_dlp__version = f'{curl_cffi.__version__} (unsupported)' - raise ImportError('Only curl_cffi 0.5.10 is supported') + raise ImportError('Only curl_cffi versions 0.5.10, 0.7.X are supported') import curl_cffi.requests from curl_cffi.const import CurlECode, CurlOpt @@ -110,6 +112,13 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin): _SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY) _SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h') _SUPPORTED_IMPERSONATE_TARGET_MAP = { + **({ + ImpersonateTarget('chrome', '124', 'macos', '14'): curl_cffi.requests.BrowserType.chrome124, + ImpersonateTarget('chrome', '123', 'macos', '14'): curl_cffi.requests.BrowserType.chrome123, + ImpersonateTarget('chrome', '120', 'macos', '14'): curl_cffi.requests.BrowserType.chrome120, + ImpersonateTarget('chrome', '119', 'macos', '14'): curl_cffi.requests.BrowserType.chrome119, + ImpersonateTarget('chrome', '116', 'windows', '10'): curl_cffi.requests.BrowserType.chrome116, + } if curl_cffi_version >= (0, 7, 0) else {}), ImpersonateTarget('chrome', '110', 'windows', '10'): curl_cffi.requests.BrowserType.chrome110, ImpersonateTarget('chrome', '107', 'windows', '10'): curl_cffi.requests.BrowserType.chrome107, ImpersonateTarget('chrome', '104', 'windows', '10'): curl_cffi.requests.BrowserType.chrome104, @@ -118,9 +127,15 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin): ImpersonateTarget('chrome', '99', 'windows', '10'): curl_cffi.requests.BrowserType.chrome99, ImpersonateTarget('edge', '101', 'windows', '10'): curl_cffi.requests.BrowserType.edge101, ImpersonateTarget('edge', '99', 'windows', '10'): curl_cffi.requests.BrowserType.edge99, + **({ + ImpersonateTarget('safari', '17.0', 'macos', '14'): curl_cffi.requests.BrowserType.safari17_0, + } if curl_cffi_version >= (0, 7, 0) else {}), ImpersonateTarget('safari', '15.5', 'macos', '12'): curl_cffi.requests.BrowserType.safari15_5, ImpersonateTarget('safari', '15.3', 'macos', '11'): curl_cffi.requests.BrowserType.safari15_3, ImpersonateTarget('chrome', '99', 'android', '12'): curl_cffi.requests.BrowserType.chrome99_android, + **({ + ImpersonateTarget('safari', '17.2', 'ios', '17.2'): curl_cffi.requests.BrowserType.safari17_2_ios, + } if curl_cffi_version >= (0, 7, 0) else {}), } def _create_instance(self, cookiejar=None): @@ -131,6 +146,9 @@ def _check_extensions(self, extensions): extensions.pop('impersonate', None) extensions.pop('cookiejar', None) extensions.pop('timeout', None) + # CurlCFFIRH ignores legacy ssl options currently. + # Impersonation generally uses a looser SSL configuration than urllib/requests. + extensions.pop('legacy_ssl', None) def send(self, request: Request) -> Response: target = self._get_request_target(request) @@ -187,7 +205,7 @@ def _send(self, request: Request): timeout = self._calculate_timeout(request) # set CURLOPT_LOW_SPEED_LIMIT and CURLOPT_LOW_SPEED_TIME to act as a read timeout. [1] - # curl_cffi does not currently do this. [2] + # This is required only for 0.5.10 [2] # Note: CURLOPT_LOW_SPEED_TIME is in seconds, so we need to round up to the nearest second. [3] # [1] https://unix.stackexchange.com/a/305311 # [2] https://github.com/yifeikong/curl_cffi/issues/156 @@ -203,7 +221,7 @@ def _send(self, request: Request): data=request.data, verify=self.verify, max_redirects=5, - timeout=timeout, + timeout=(timeout, timeout), impersonate=self._SUPPORTED_IMPERSONATE_TARGET_MAP.get( self._get_request_target(request)), interface=self.source_address, @@ -222,7 +240,7 @@ def _send(self, request: Request): elif ( e.code == CurlECode.PROXY - or (e.code == CurlECode.RECV_ERROR and 'Received HTTP code 407 from proxy after CONNECT' in str(e)) + or (e.code == CurlECode.RECV_ERROR and 'CONNECT' in str(e)) ): raise ProxyError(cause=e) from e else: diff --git a/plugin/yt-dlp/yt_dlp/networking/_requests.py b/plugin/yt-dlp/yt_dlp/networking/_requests.py index 86850c1..7de95ab 100644 --- a/plugin/yt-dlp/yt_dlp/networking/_requests.py +++ b/plugin/yt-dlp/yt_dlp/networking/_requests.py @@ -295,11 +295,12 @@ def _check_extensions(self, extensions): super()._check_extensions(extensions) extensions.pop('cookiejar', None) extensions.pop('timeout', None) + extensions.pop('legacy_ssl', None) - def _create_instance(self, cookiejar): + def _create_instance(self, cookiejar, legacy_ssl_support=None): session = RequestsSession() http_adapter = RequestsHTTPAdapter( - ssl_context=self._make_sslcontext(), + ssl_context=self._make_sslcontext(legacy_ssl_support=legacy_ssl_support), source_address=self.source_address, max_retries=urllib3.util.retry.Retry(False), ) @@ -318,7 +319,10 @@ def _send(self, request): max_redirects_exceeded = False - session = self._get_instance(cookiejar=self._get_cookiejar(request)) + session = self._get_instance( + cookiejar=self._get_cookiejar(request), + legacy_ssl_support=request.extensions.get('legacy_ssl'), + ) try: requests_res = session.request( diff --git a/plugin/yt-dlp/yt_dlp/networking/_urllib.py b/plugin/yt-dlp/yt_dlp/networking/_urllib.py index 6299582..510bb2a 100644 --- a/plugin/yt-dlp/yt_dlp/networking/_urllib.py +++ b/plugin/yt-dlp/yt_dlp/networking/_urllib.py @@ -348,14 +348,15 @@ def _check_extensions(self, extensions): super()._check_extensions(extensions) extensions.pop('cookiejar', None) extensions.pop('timeout', None) + extensions.pop('legacy_ssl', None) - def _create_instance(self, proxies, cookiejar): + def _create_instance(self, proxies, cookiejar, legacy_ssl_support=None): opener = urllib.request.OpenerDirector() handlers = [ ProxyHandler(proxies), HTTPHandler( debuglevel=int(bool(self.verbose)), - context=self._make_sslcontext(), + context=self._make_sslcontext(legacy_ssl_support=legacy_ssl_support), source_address=self.source_address), HTTPCookieProcessor(cookiejar), DataHandler(), @@ -391,6 +392,7 @@ def _send(self, request): opener = self._get_instance( proxies=self._get_proxies(request), cookiejar=self._get_cookiejar(request), + legacy_ssl_support=request.extensions.get('legacy_ssl'), ) try: res = opener.open(urllib_req, timeout=self._calculate_timeout(request)) diff --git a/plugin/yt-dlp/yt_dlp/networking/_websockets.py b/plugin/yt-dlp/yt_dlp/networking/_websockets.py index 15db4fe..492af11 100644 --- a/plugin/yt-dlp/yt_dlp/networking/_websockets.py +++ b/plugin/yt-dlp/yt_dlp/networking/_websockets.py @@ -118,6 +118,7 @@ def _check_extensions(self, extensions): super()._check_extensions(extensions) extensions.pop('timeout', None) extensions.pop('cookiejar', None) + extensions.pop('legacy_ssl', None) def close(self): # Remove the logging handler that contains a reference to our logger @@ -154,13 +155,14 @@ def _send(self, request): address=(wsuri.host, wsuri.port), **create_conn_kwargs, ) + ssl_ctx = self._make_sslcontext(legacy_ssl_support=request.extensions.get('legacy_ssl')) conn = websockets.sync.client.connect( sock=sock, uri=request.url, additional_headers=headers, open_timeout=timeout, user_agent_header=None, - ssl_context=self._make_sslcontext() if wsuri.secure else None, + ssl_context=ssl_ctx if wsuri.secure else None, close_timeout=0, # not ideal, but prevents yt-dlp hanging ) return WebsocketsResponseAdapter(conn, url=request.url) diff --git a/plugin/yt-dlp/yt_dlp/networking/common.py b/plugin/yt-dlp/yt_dlp/networking/common.py index a6db167..e8951c7 100644 --- a/plugin/yt-dlp/yt_dlp/networking/common.py +++ b/plugin/yt-dlp/yt_dlp/networking/common.py @@ -205,6 +205,7 @@ class RequestHandler(abc.ABC): The following extensions are defined for RequestHandler: - `cookiejar`: Cookiejar to use for this request. - `timeout`: socket timeout to use for this request. + - `legacy_ssl`: Enable legacy SSL options for this request. See legacy_ssl_support. To enable these, add extensions.pop('', None) to _check_extensions Apart from the url protocol, proxies dict may contain the following keys: @@ -247,10 +248,10 @@ def __init__( self.legacy_ssl_support = legacy_ssl_support super().__init__() - def _make_sslcontext(self): + def _make_sslcontext(self, legacy_ssl_support=None): return make_ssl_context( verify=self.verify, - legacy_support=self.legacy_ssl_support, + legacy_support=legacy_ssl_support if legacy_ssl_support is not None else self.legacy_ssl_support, use_certifi=not self.prefer_system_certs, **self._client_cert, ) @@ -262,7 +263,8 @@ def _calculate_timeout(self, request): return float(request.extensions.get('timeout') or self.timeout) def _get_cookiejar(self, request): - return request.extensions.get('cookiejar') or self.cookiejar + cookiejar = request.extensions.get('cookiejar') + return self.cookiejar if cookiejar is None else cookiejar def _get_proxies(self, request): return (request.proxies or self.proxies).copy() @@ -314,6 +316,7 @@ def _check_extensions(self, extensions): """Check extensions for unsupported extensions. Subclasses should extend this.""" assert isinstance(extensions.get('cookiejar'), (YoutubeDLCookieJar, NoneType)) assert isinstance(extensions.get('timeout'), (float, int, NoneType)) + assert isinstance(extensions.get('legacy_ssl'), (bool, NoneType)) def _validate(self, request): self._check_url_scheme(request) diff --git a/plugin/yt-dlp/yt_dlp/options.py b/plugin/yt-dlp/yt_dlp/options.py index 76db06c..ffe2463 100644 --- a/plugin/yt-dlp/yt_dlp/options.py +++ b/plugin/yt-dlp/yt_dlp/options.py @@ -462,6 +462,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): 'the STREAM (stdout or stderr) to apply the setting to. ' 'Can be one of "always", "auto" (default), "never", or ' '"no_color" (use non color terminal sequences). ' + 'Use "auto-tty" or "no_color-tty" to decide based on terminal support only. ' 'Can be used multiple times')) general.add_option( '--compat-options', diff --git a/plugin/yt-dlp/yt_dlp/update.py b/plugin/yt-dlp/yt_dlp/update.py index 8c6790d..72ae290 100644 --- a/plugin/yt-dlp/yt_dlp/update.py +++ b/plugin/yt-dlp/yt_dlp/update.py @@ -310,6 +310,7 @@ def _download_update_spec(self, source_tags): if isinstance(error, HTTPError) and error.status == 404: continue self._report_network_error(f'fetch update spec: {error}') + return None self._report_error( f'The requested tag {self.requested_tag} does not exist for {self.requested_repo}', True) @@ -557,9 +558,10 @@ def _report_permission_error(self, file): def _report_network_error(self, action, delim=';', tag=None): if not tag: tag = self.requested_tag + path = tag if tag == 'latest' else f'tag/{tag}' self._report_error( - f'Unable to {action}{delim} visit https://github.com/{self.requested_repo}/releases/' - + tag if tag == 'latest' else f'tag/{tag}', True) + f'Unable to {action}{delim} visit ' + f'https://github.com/{self.requested_repo}/releases/{path}', True) # XXX: Everything below this line in this class is deprecated / for compat only @property diff --git a/plugin/yt-dlp/yt_dlp/utils/_utils.py b/plugin/yt-dlp/yt_dlp/utils/_utils.py index b582b7d..0d3e707 100644 --- a/plugin/yt-dlp/yt_dlp/utils/_utils.py +++ b/plugin/yt-dlp/yt_dlp/utils/_utils.py @@ -1217,7 +1217,7 @@ def unified_timestamp(date_str, day_first=True): return None date_str = re.sub(r'\s+', ' ', re.sub( - r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str)) + r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?|sun)(day)?', '', date_str)) pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0 timezone, date_str = extract_timezone(date_str) @@ -2984,6 +2984,7 @@ def parse_codecs(codecs_str): str.strip, codecs_str.strip().strip(',').split(',')))) vcodec, acodec, scodec, hdr = None, None, None, None for full_codec in split_codecs: + full_codec = re.sub(r'^([^.]+)', lambda m: m.group(1).lower(), full_codec) parts = re.sub(r'0+(?=\d)', '', full_codec).split('.') if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'): @@ -5110,6 +5111,7 @@ class _UnsafeExtensionError(Exception): 'gif', 'heic', 'ico', + 'image', 'jng', 'jpeg', 'jxl', diff --git a/plugin/yt-dlp/yt_dlp/version.py b/plugin/yt-dlp/yt_dlp/version.py index 31de564..6633a11 100644 --- a/plugin/yt-dlp/yt_dlp/version.py +++ b/plugin/yt-dlp/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.07.09' +__version__ = '2024.08.06' -RELEASE_GIT_HEAD = '7ead7332af69422cee931aec3faa277288e9e212' +RELEASE_GIT_HEAD = '4d9231208332d4c32364b8cd814bff8b20232cae' VARIANT = None @@ -12,4 +12,4 @@ ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2024.07.09' +_pkg_version = '2024.08.06'