From 61d7f7d15e1f9be1bd69fda715085bec035f1a61 Mon Sep 17 00:00:00 2001 From: Ben Rog-Wilhelm Date: Sat, 17 Apr 2021 23:11:38 -0500 Subject: [PATCH 1/2] Add support for an uncommon typo in a GDC Vault page --- youtube_dl/extractor/gdcvault.py | 14 ++++++++++++++ youtube_dl/extractor/kaltura.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 2f555c1d40c..1a284be39d1 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -102,6 +102,20 @@ class GDCVaultIE(InfoExtractor): 'format': 'mp4-408', }, }, + { + 'url': 'https://www.gdcvault.com/play/1025699', + 'info_dict': { + 'id': '0_zagynv0a', + 'ext': 'mp4', + 'title': 'Tech Toolbox', + 'upload_date': '20190408', + 'uploader_id': 'joe@blazestreaming.com', + 'timestamp': 1554764629, + }, + 'params': { + 'skip_download': True, + }, + }, ] def _login(self, webpage_url, display_id): diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 49d13460df7..bf41e11c9ef 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -145,7 +145,7 @@ def _extract_urls(webpage): ''', webpage) or re.finditer( r'''(?xs) - <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P["']) + <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P["'])\s* (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P\d+) (?:(?!(?P=q1)).)* [?&;]entry_id=(?P(?:(?!(?P=q1))[^&])+) From 60f40d7197b21467b5a98eb3427d62f27475c50d Mon Sep 17 00:00:00 2001 From: Sergey M Date: Wed, 5 May 2021 02:13:14 +0700 Subject: [PATCH 2/2] Update gdcvault.py --- youtube_dl/extractor/gdcvault.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 1a284be39d1..5ad40ee234e 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -103,6 +103,7 @@ class GDCVaultIE(InfoExtractor): }, }, { + # Kaltura embed, whitespace between quote and embedded URL in iframe's src 'url': 'https://www.gdcvault.com/play/1025699', 'info_dict': { 'id': '0_zagynv0a',