From 2f4b57594673035a59d72f7667588da848820034 Mon Sep 17 00:00:00 2001
From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com>
Date: Sat, 3 Feb 2024 05:56:29 +0900
Subject: [PATCH 01/10] [ie/zetland] Add extractor (#9116)
Closes #9024
Authored by: HobbyistDev
---
yt_dlp/extractor/_extractors.py | 1 +
yt_dlp/extractor/zetland.py | 71 +++++++++++++++++++++++++++++++++
2 files changed, 72 insertions(+)
create mode 100644 yt_dlp/extractor/zetland.py
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 4c86040990aa..7726fe359728 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -2496,6 +2496,7 @@
Zee5SeriesIE,
)
from .zeenews import ZeeNewsIE
+from .zetland import ZetlandDKArticleIE
from .zhihu import ZhihuIE
from .zingmp3 import (
ZingMp3IE,
diff --git a/yt_dlp/extractor/zetland.py b/yt_dlp/extractor/zetland.py
new file mode 100644
index 000000000000..055a643b3ce5
--- /dev/null
+++ b/yt_dlp/extractor/zetland.py
@@ -0,0 +1,71 @@
+from .common import InfoExtractor
+from ..utils import merge_dicts, unified_timestamp, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class ZetlandDKArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.zetland\.dk/\w+/(?P(?P\w{8})-(?P\w{8})-(?:\w{5}))'
+ _TESTS = [{
+ 'url': 'https://www.zetland.dk/historie/sO9aq2MY-a81VP3BY-66e69?utm_source=instagram&utm_medium=linkibio&utm_campaign=artikel',
+ 'info_dict': {
+ 'id': 'sO9aq2MY-a81VP3BY-66e69',
+ 'ext': 'mp3',
+ 'modified_date': '20240118',
+ 'title': 'Afsnit 1: “Det føltes som en kidnapning.” ',
+ 'upload_date': '20240116',
+ 'uploader_id': 'a81VP3BY',
+ 'modified_timestamp': 1705568739,
+ 'release_timestamp': 1705377592,
+ 'uploader_url': 'https://www.zetland.dk/skribent/a81VP3BY',
+ 'uploader': 'Helle Fuusager',
+ 'release_date': '20240116',
+ 'thumbnail': r're:https://zetland\.imgix\.net/2aafe500-b14e-11ee-bf83-65d5e1283a57/Zetland_Image_1\.jpg',
+ 'description': 'md5:9619d426772c133f5abb26db27f26a01',
+ 'timestamp': 1705377592,
+ 'series_id': '62d54630-e87b-4ab1-a255-8de58dbe1b14',
+ }
+
+ }]
+
+ def _real_extract(self, url):
+ display_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
+ webpage = self._download_webpage(url, display_id)
+
+ next_js_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']
+ story_data = traverse_obj(next_js_data, ('initialState', 'consume', 'story', 'story'))
+
+ formats = []
+ for audio_url in traverse_obj(story_data, ('story_content', 'meta', 'audioFiles', ..., {url_or_none})):
+ formats.append({
+ 'url': audio_url,
+ 'vcodec': 'none',
+ })
+
+ return merge_dicts({
+ 'id': display_id,
+ 'formats': formats,
+ 'uploader_id': uploader_id
+ }, traverse_obj(story_data, {
+ 'title': ((('story_content', 'content', 'title'), 'title'), {str}),
+ 'uploader': ('sharer', 'name'),
+ 'uploader_id': ('sharer', 'sharer_id'),
+ 'description': ('story_content', 'content', 'socialDescription'),
+ 'series_id': ('story_content', 'meta', 'seriesId'),
+ 'release_timestamp': ('published_at', {unified_timestamp}),
+ 'modified_timestamp': ('revised_at', {unified_timestamp}),
+ }, get_all=False), traverse_obj(next_js_data, ('metaInfo', {
+ 'title': ((('meta', 'title'), ('ld', 'headline'), ('og', 'og:title'), ('og', 'twitter:title')), {str}),
+ 'description': ((('meta', 'description'), ('ld', 'description'), ('og', 'og:description'), ('og', 'twitter:description')), {str}),
+ 'uploader': ((('meta', 'author'), ('ld', 'author', 'name')), {str}),
+ 'uploader_url': ('ld', 'author', 'url', {url_or_none}),
+ 'thumbnail': ((('ld', 'image'), ('og', 'og:image'), ('og', 'twitter:image')), {url_or_none}),
+ 'modified_timestamp': ('ld', 'dateModified', {unified_timestamp}),
+ 'release_timestamp': ('ld', 'datePublished', {unified_timestamp}),
+ 'timestamp': ('ld', 'dateCreated', {unified_timestamp}),
+ }), get_all=False), {
+ 'title': self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage),
+ 'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
+ 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
+ 'uploader': self._html_search_meta(['author'], webpage),
+ 'release_timestamp': unified_timestamp(self._html_search_meta(['article:published_time'], webpage)),
+ }, self._search_json_ld(webpage, display_id, fatal=False))
From a0d50aabc5462aee302bd3f2663d3a3554875789 Mon Sep 17 00:00:00 2001
From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com>
Date: Sat, 3 Feb 2024 05:57:53 +0900
Subject: [PATCH 02/10] [ie/orf:on] Add extractor (#9113)
Closes #8903
Authored by: HobbyistDev
---
yt_dlp/extractor/_extractors.py | 1 +
yt_dlp/extractor/orf.py | 64 +++++++++++++++++++++++++++++++++
2 files changed, 65 insertions(+)
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 7726fe359728..04318a716da5 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1394,6 +1394,7 @@
from .orf import (
ORFTVthekIE,
ORFFM4StoryIE,
+ ORFONIE,
ORFRadioIE,
ORFPodcastIE,
ORFIPTVIE,
diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py
index 9a48ae1b3e49..1b2a79a625a3 100644
--- a/yt_dlp/extractor/orf.py
+++ b/yt_dlp/extractor/orf.py
@@ -1,3 +1,4 @@
+import base64
import functools
import re
@@ -565,3 +566,66 @@ def _real_extract(self, url):
})
return self.playlist_result(entries)
+
+
+class ORFONIE(InfoExtractor):
+ IE_NAME = 'orf:on'
+ _VALID_URL = r'https?://on\.orf\.at/video/(?P\d{8})/(?P[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
+ 'info_dict': {
+ 'id': '14210000',
+ 'ext': 'mp4',
+ 'duration': 2651.08,
+ 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg',
+ 'title': 'School of Champions (4/8)',
+ 'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
+ 'media_type': 'episode',
+ 'timestamp': 1706472362,
+ 'upload_date': '20240128',
+ }
+ }]
+
+ def _extract_video(self, video_id, display_id):
+ encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
+ api_json = self._download_json(
+ f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
+
+ formats, subtitles = [], {}
+ for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
+ for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
+ if manifest_type == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ manifest_url, display_id, fatal=False, m3u8_id='hls')
+ elif manifest_type == 'dash':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ manifest_url, display_id, fatal=False, mpd_id='dash')
+ else:
+ continue
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(api_json, {
+ 'duration': ('duration_second', {float_or_none}),
+ 'title': (('title', 'headline'), {str}),
+ 'description': (('description', 'teaser_text'), {str}),
+ 'media_type': ('video_type', {str}),
+ }, get_all=False),
+ }
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).group('id', 'slug')
+ webpage = self._download_webpage(url, display_id)
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
+ 'description': self._html_search_meta(
+ ['description', 'og:description', 'twitter:description'], webpage, default=None),
+ **self._search_json_ld(webpage, display_id, fatal=False),
+ **self._extract_video(video_id, display_id),
+ }
From ffa017cfc5973b265c92248546fcf5020dc43eaf Mon Sep 17 00:00:00 2001
From: c-basalt <117849907+c-basalt@users.noreply.github.com>
Date: Fri, 2 Feb 2024 16:08:29 -0500
Subject: [PATCH 03/10] [ie/BiliBiliSearch] Set cookie to fix extraction
(#9119)
Closes #5083
Authored by: c-basalt
---
yt_dlp/extractor/bilibili.py | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index cd7df69ef028..4ed9e2af7299 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -7,6 +7,7 @@
import re
import time
import urllib.parse
+import uuid
from .common import InfoExtractor, SearchInfoExtractor
from ..dependencies import Cryptodome
@@ -1464,8 +1465,37 @@ class BiliBiliSearchIE(SearchInfoExtractor):
IE_DESC = 'Bilibili video search'
_MAX_RESULTS = 100000
_SEARCH_KEY = 'bilisearch'
+ _TESTS = [{
+ 'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
+ 'playlist_count': 3,
+ 'info_dict': {
+ 'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
+ 'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'BV1n44y1Q7sc',
+ 'ext': 'mp4',
+ 'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
+ 'timestamp': 1669889987,
+ 'upload_date': '20221201',
+ 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
+ 'tags': list,
+ 'uploader': '靡烟miya',
+ 'duration': 123.156,
+ 'uploader_id': '1958703906',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ '_old_archive_ids': ['bilibili 988222410_part1'],
+ },
+ }],
+ }]
def _search_results(self, query):
+ if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
+ self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
for page_num in itertools.count(1):
videos = self._download_json(
'https://api.bilibili.com/x/web-interface/search/type', query,
From 8e765755f7f4909e1b535e61b7376b2d66e1ba6a Mon Sep 17 00:00:00 2001
From: bashonly <88596187+bashonly@users.noreply.github.com>
Date: Fri, 2 Feb 2024 15:15:04 -0600
Subject: [PATCH 04/10] [ie/vimeo] Fix API headers (#9125)
Closes #9124
Authored by: bashonly
---
yt_dlp/extractor/vimeo.py | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index e5e8144bb10b..208e11184905 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -269,7 +269,7 @@ def _extract_original_format(self, url, video_id, unlisted_hash=None):
'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
if not jwt_response.get('jwt'):
return
- headers = {'Authorization': 'jwt %s' % jwt_response['jwt']}
+ headers = {'Authorization': 'jwt %s' % jwt_response['jwt'], 'Accept': 'application/json'}
original_response = self._download_json(
f'https://api.vimeo.com/videos/{video_id}', video_id,
headers=headers, fatal=False, expected_status=(403, 404)) or {}
@@ -751,6 +751,7 @@ def _extract_from_api(self, video_id, unlisted_hash=None):
video = self._download_json(
api_url, video_id, headers={
'Authorization': 'jwt ' + token,
+ 'Accept': 'application/json',
}, query={
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
})
@@ -785,7 +786,7 @@ def _try_album_password(self, url):
jwt = viewer['jwt']
album = self._download_json(
'https://api.vimeo.com/albums/' + album_id,
- album_id, headers={'Authorization': 'jwt ' + jwt},
+ album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
query={'fields': 'description,name,privacy'})
if try_get(album, lambda x: x['privacy']['view']) == 'password':
password = self.get_param('videopassword')
@@ -1147,10 +1148,12 @@ def _fetch_page(self, album_id, authorization, hashed_pass, page):
'https://api.vimeo.com/albums/%s/videos' % album_id,
album_id, 'Downloading page %d' % api_page, query=query, headers={
'Authorization': 'jwt ' + authorization,
+ 'Accept': 'application/json',
})['data']
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
return
+ raise
for video in videos:
link = video.get('link')
if not link:
@@ -1171,7 +1174,7 @@ def _real_extract(self, url):
jwt = viewer['jwt']
album = self._download_json(
'https://api.vimeo.com/albums/' + album_id,
- album_id, headers={'Authorization': 'jwt ' + jwt},
+ album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
query={'fields': 'description,name,privacy'})
hashed_pass = None
if try_get(album, lambda x: x['privacy']['view']) == 'password':
From 4253e3b7f483127bd812bdac02466f4a5b47ff34 Mon Sep 17 00:00:00 2001
From: sepro <4618135+seproDev@users.noreply.github.com>
Date: Sat, 3 Feb 2024 15:59:43 +0100
Subject: [PATCH 05/10] [ie/CCMA] Extract 1080p DASH formats (#9130)
Closes #5755
Authored by: seproDev
---
yt_dlp/extractor/ccma.py | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/yt_dlp/extractor/ccma.py b/yt_dlp/extractor/ccma.py
index 88ff82f6e6b2..ab840f3016be 100644
--- a/yt_dlp/extractor/ccma.py
+++ b/yt_dlp/extractor/ccma.py
@@ -1,6 +1,7 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
+ determine_ext,
int_or_none,
parse_duration,
parse_resolution,
@@ -60,6 +61,7 @@ def _real_extract(self, url):
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
'media': media_type,
'idint': media_id,
+ 'format': 'dm',
})
formats = []
@@ -69,6 +71,10 @@ def _real_extract(self, url):
format_url = url_or_none(format_.get('file'))
if not format_url:
continue
+ if determine_ext(format_url) == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, media_id, mpd_id='dash', fatal=False))
+ continue
label = format_.get('label')
f = parse_resolution(label)
f.update({
From e3ce2b385ec1f03fac9d4210c57fda77134495fc Mon Sep 17 00:00:00 2001
From: YoshichikaAAA <154937389+YoshichikaAAA@users.noreply.github.com>
Date: Sun, 4 Feb 2024 03:44:17 +0900
Subject: [PATCH 06/10] [ie/radiko] Extract more metadata (#9115)
Authored by: YoshichikaAAA
---
yt_dlp/extractor/radiko.py | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py
index c363d9ba5f7b..2b6405999520 100644
--- a/yt_dlp/extractor/radiko.py
+++ b/yt_dlp/extractor/radiko.py
@@ -1,5 +1,6 @@
import base64
import random
+import re
import urllib.parse
from .common import InfoExtractor
@@ -11,6 +12,7 @@
unified_timestamp,
update_url_query,
)
+from ..utils.traversal import traverse_obj
class RadikoBaseIE(InfoExtractor):
@@ -159,6 +161,12 @@ def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token,
return formats
+ def _extract_performers(self, prog):
+ performers = traverse_obj(prog, (
+ 'pfm/text()', ..., {lambda x: re.split(r'[//、 ,,]', x)}, ..., {str.strip}))
+ # TODO: change 'artist' fields to 'artists' and return traversal list instead of str
+ return ', '.join(performers) or None
+
class RadikoIE(RadikoBaseIE):
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P[A-Z0-9-]+)/(?P\d+)'
@@ -186,10 +194,12 @@ def _real_extract(self, url):
return {
'id': video_id,
'title': try_call(lambda: prog.find('title').text),
+ 'artist': self._extract_performers(prog),
'description': clean_html(try_call(lambda: prog.find('info').text)),
'uploader': try_call(lambda: station_program.find('.//name').text),
'uploader_id': station,
'timestamp': vid_int,
+ 'duration': try_call(lambda: unified_timestamp(radio_end, False) - unified_timestamp(radio_begin, False)),
'is_live': True,
'formats': self._extract_formats(
video_id=video_id, station=station, is_onair=False,
@@ -243,6 +253,7 @@ def _real_extract(self, url):
return {
'id': station,
'title': title,
+ 'artist': self._extract_performers(prog),
'description': description,
'uploader': station_name,
'uploader_id': station,
From 96d0f8c1cb8aec250c5614bfde6b5fb95f10819b Mon Sep 17 00:00:00 2001
From: Michal
Date: Mon, 5 Feb 2024 00:25:13 +0100
Subject: [PATCH 07/10] [ie/eporner] Extract AV1 formats (#9028)
Authored by: michal-repo
---
yt_dlp/extractor/eporner.py | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/yt_dlp/extractor/eporner.py b/yt_dlp/extractor/eporner.py
index aee2dee58176..b18a76c7c177 100644
--- a/yt_dlp/extractor/eporner.py
+++ b/yt_dlp/extractor/eporner.py
@@ -1,8 +1,10 @@
from .common import InfoExtractor
from ..utils import (
- encode_base_n,
ExtractorError,
+ encode_base_n,
+ get_elements_by_class,
int_or_none,
+ join_nonempty,
merge_dicts,
parse_duration,
str_to_int,
@@ -81,6 +83,7 @@ def calc_hash(s):
sources = video['sources']
formats = []
+ has_av1 = bool(get_elements_by_class('download-av1', webpage))
for kind, formats_dict in sources.items():
if not isinstance(formats_dict, dict):
continue
@@ -106,6 +109,14 @@ def calc_hash(s):
'height': height,
'fps': fps,
})
+ if has_av1:
+ formats.append({
+ 'url': src.replace('.mp4', '-av1.mp4'),
+ 'format_id': join_nonempty('av1', format_id),
+ 'height': height,
+ 'fps': fps,
+ 'vcodec': 'av1',
+ })
json_ld = self._search_json_ld(webpage, display_id, default={})
From e439693f729daf6fb15457baea1bca10ef5da34d Mon Sep 17 00:00:00 2001
From: c-basalt <117849907+c-basalt@users.noreply.github.com>
Date: Sun, 4 Feb 2024 18:28:45 -0500
Subject: [PATCH 08/10] [ie/bilibili] Support `--no-playlist` (#9139)
Addresses #8499
Authored by: c-basalt
---
yt_dlp/extractor/bilibili.py | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index 4ed9e2af7299..c138bde3a507 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -1305,6 +1305,26 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'upload_date': '20211127',
},
'playlist_mincount': 513,
+ }, {
+ 'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
+ 'info_dict': {
+ 'id': 'BV1DU4y1r7tz',
+ 'ext': 'mp4',
+ 'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
+ 'upload_date': '20220820',
+ 'description': '',
+ 'timestamp': 1661016330,
+ 'uploader_id': '1958703906',
+ 'uploader': '靡烟miya',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ 'duration': 9552.903,
+ 'tags': list,
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ '_old_archive_ids': ['bilibili 687146339_part1'],
+ },
+ 'params': {'noplaylist': True},
}, {
'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
'info_dict': {
@@ -1356,6 +1376,11 @@ def _extract_medialist(self, query, list_id):
def _real_extract(self, url):
list_id = self._match_id(url)
+
+ bvid = traverse_obj(parse_qs(url), ('bvid', 0))
+ if not self._yes_playlist(list_id, bvid):
+ return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
+
webpage = self._download_webpage(url, list_id)
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
From 07256b9fee23960799024b95d5972abc7174aa81 Mon Sep 17 00:00:00 2001
From: SirElderling <148036781+SirElderling@users.noreply.github.com>
Date: Mon, 5 Feb 2024 00:35:52 +0000
Subject: [PATCH 09/10] [ie/nytimes] Overhaul extractors (#9075)
Closes #2899, Closes #8605
Authored by: SirElderling
---
yt_dlp/extractor/_extractors.py | 1 +
yt_dlp/extractor/nytimes.py | 450 +++++++++++++++++++++-----------
2 files changed, 303 insertions(+), 148 deletions(-)
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 04318a716da5..36335286c3a0 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1352,6 +1352,7 @@
NYTimesIE,
NYTimesArticleIE,
NYTimesCookingIE,
+ NYTimesCookingRecipeIE,
)
from .nuvid import NuvidIE
from .nzherald import NZHeraldIE
diff --git a/yt_dlp/extractor/nytimes.py b/yt_dlp/extractor/nytimes.py
index 2e21edbb4120..354eb02c3485 100644
--- a/yt_dlp/extractor/nytimes.py
+++ b/yt_dlp/extractor/nytimes.py
@@ -1,50 +1,92 @@
-import hmac
-import hashlib
-import base64
+import json
+import uuid
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
+ clean_html,
determine_ext,
+ extract_attributes,
float_or_none,
+ get_elements_html_by_class,
int_or_none,
- js_to_json,
+ merge_dicts,
mimetype2ext,
parse_iso8601,
+ remove_end,
remove_start,
+ str_or_none,
+ traverse_obj,
+ url_or_none,
)
class NYTimesBaseIE(InfoExtractor):
- _SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v'
-
- def _extract_video_from_id(self, video_id):
- # Authorization generation algorithm is reverse engineered from `signer` in
- # http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js
- path = '/svc/video/api/v3/video/' + video_id
- hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest()
- video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={
- 'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(),
- 'X-NYTV': 'vhs',
- }, fatal=False)
- if not video_data:
- video_data = self._download_json(
- 'http://www.nytimes.com/svc/video/api/v2/video/' + video_id,
- video_id, 'Downloading video JSON')
-
- title = video_data['headline']
-
- def get_file_size(file_size):
- if isinstance(file_size, int):
- return file_size
- elif isinstance(file_size, dict):
- return int(file_size.get('value', 0))
- else:
- return None
-
+ _DNS_NAMESPACE = uuid.UUID('36dd619a-56dc-595b-9e09-37f4152c7b5d')
+ _TOKEN = 'MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuNIzKBOFB77aT/jN/FQ+/QVKWq5V1ka1AYmCR9hstz1pGNPH5ajOU9gAqta0T89iPnhjwla+3oec/Z3kGjxbpv6miQXufHFq3u2RC6HyU458cLat5kVPSOQCe3VVB5NRpOlRuwKHqn0txfxnwSSj8mqzstR997d3gKB//RO9zE16y3PoWlDQXkASngNJEWvL19iob/xwAkfEWCjyRILWFY0JYX3AvLMSbq7wsqOCE5srJpo7rRU32zsByhsp1D5W9OYqqwDmflsgCEQy2vqTsJjrJohuNg+urMXNNZ7Y3naMoqttsGDrWVxtPBafKMI8pM2ReNZBbGQsQXRzQNo7+QIDAQAB'
+ _GRAPHQL_API = 'https://samizdat-graphql.nytimes.com/graphql/v2'
+ _GRAPHQL_QUERY = '''query VideoQuery($id: String!) {
+ video(id: $id) {
+ ... on Video {
+ bylines {
+ renderedRepresentation
+ }
+ duration
+ promotionalHeadline
+ promotionalMedia {
+ ... on Image {
+ crops {
+ name
+ renditions {
+ name
+ width
+ height
+ url
+ }
+ }
+ }
+ }
+ renditions {
+ type
+ width
+ height
+ url
+ bitrate
+ }
+ summary
+ }
+ }
+}'''
+
+ def _call_api(self, media_id):
+ # reference: `id-to-uri.js`
+ video_uuid = uuid.uuid5(self._DNS_NAMESPACE, 'video')
+ media_uuid = uuid.uuid5(video_uuid, media_id)
+
+ return traverse_obj(self._download_json(
+ self._GRAPHQL_API, media_id, 'Downloading JSON from GraphQL API', data=json.dumps({
+ 'query': self._GRAPHQL_QUERY,
+ 'variables': {'id': f'nyt://video/{media_uuid}'},
+ }, separators=(',', ':')).encode(), headers={
+ 'Content-Type': 'application/json',
+ 'Nyt-App-Type': 'vhs',
+ 'Nyt-App-Version': 'v3.52.21',
+ 'Nyt-Token': self._TOKEN,
+ 'Origin': 'https://nytimes.com',
+ }, fatal=False), ('data', 'video', {dict})) or {}
+
+ def _extract_thumbnails(self, thumbs):
+ return traverse_obj(thumbs, (lambda _, v: url_or_none(v['url']), {
+ 'url': 'url',
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ }), default=None)
+
+ def _extract_formats_and_subtitles(self, video_id, content_media_json):
urls = []
formats = []
subtitles = {}
- for video in video_data.get('renditions', []):
+ for video in traverse_obj(content_media_json, ('renditions', ..., {dict})):
video_url = video.get('url')
format_id = video.get('type')
if not video_url or format_id == 'thumbs' or video_url in urls:
@@ -56,11 +98,9 @@ def get_file_size(file_size):
video_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=format_id or 'hls', fatal=False)
formats.extend(m3u8_fmts)
- subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+ self._merge_subtitles(m3u8_subs, target=subtitles)
elif ext == 'mpd':
- continue
- # formats.extend(self._extract_mpd_formats(
- # video_url, video_id, format_id or 'dash', fatal=False))
+ continue # all mpd urls give 404 errors
else:
formats.append({
'url': video_url,
@@ -68,55 +108,49 @@ def get_file_size(file_size):
'vcodec': video.get('videoencoding') or video.get('video_codec'),
'width': int_or_none(video.get('width')),
'height': int_or_none(video.get('height')),
- 'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
+ 'filesize': traverse_obj(video, (
+ ('file_size', 'fileSize'), (None, ('value')), {int_or_none}), get_all=False),
'tbr': int_or_none(video.get('bitrate'), 1000) or None,
'ext': ext,
})
- thumbnails = []
- for image in video_data.get('images', []):
- image_url = image.get('url')
- if not image_url:
- continue
- thumbnails.append({
- 'url': 'http://www.nytimes.com/' + image_url,
- 'width': int_or_none(image.get('width')),
- 'height': int_or_none(image.get('height')),
- })
+ return formats, subtitles
- publication_date = video_data.get('publication_date')
- timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
+ def _extract_video(self, media_id):
+ data = self._call_api(media_id)
+ formats, subtitles = self._extract_formats_and_subtitles(media_id, data)
return {
- 'id': video_id,
- 'title': title,
- 'description': video_data.get('summary'),
- 'timestamp': timestamp,
- 'uploader': video_data.get('byline'),
- 'duration': float_or_none(video_data.get('duration'), 1000),
+ 'id': media_id,
+ 'title': data.get('promotionalHeadline'),
+ 'description': data.get('summary'),
+ 'duration': float_or_none(data.get('duration'), scale=1000),
+ 'creator': ', '.join(traverse_obj(data, ( # TODO: change to 'creators'
+ 'bylines', ..., 'renderedRepresentation', {lambda x: remove_start(x, 'By ')}))),
'formats': formats,
'subtitles': subtitles,
- 'thumbnails': thumbnails,
+ 'thumbnails': self._extract_thumbnails(
+ traverse_obj(data, ('promotionalMedia', 'crops', ..., 'renditions', ...))),
}
class NYTimesIE(NYTimesBaseIE):
_VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P\d+)'
_EMBED_REGEX = [r'
', webpage, 'author', default=None),
+ }
-class NYTimesCookingIE(NYTimesBaseIE):
- _VALID_URL = r'https?://cooking\.nytimes\.com/(?:guid|recip)es/(?P\d+)'
+
+class NYTimesCookingRecipeIE(InfoExtractor):
+ _VALID_URL = r'https?://cooking\.nytimes\.com/recipes/(?P\d+)'
_TESTS = [{
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
- 'md5': 'dab81fa2eaeb3f9ed47498bdcfcdc1d3',
+ 'md5': '579e83bbe8e61e9de67f80edba8a78a8',
'info_dict': {
- 'id': '100000004756089',
- 'ext': 'mov',
- 'timestamp': 1479383008,
- 'uploader': 'By SHAW LASH, ADAM SAEWITZ and JAMES HERRON',
- 'title': 'Cranberry Tart',
- 'upload_date': '20161117',
- 'description': 'If you are a fan of lemon curd or the classic French tarte au citron, you will love this cranberry version.',
+ 'id': '1017817',
+ 'ext': 'mp4',
+ 'title': 'Cranberry Curd Tart',
+ 'description': 'md5:ad77a3fc321db636256d4343c5742152',
+ 'timestamp': 1447804800,
+ 'upload_date': '20151118',
+ 'creator': 'David Tanis',
+ 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
},
}, {
- 'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
- 'md5': '4b2e8c70530a89b8d905a2b572316eb8',
+ 'url': 'https://cooking.nytimes.com/recipes/1024781-neapolitan-checkerboard-cookies',
+ 'md5': '58df35998241dcf0620e99e646331b42',
'info_dict': {
- 'id': '100000003951728',
- 'ext': 'mov',
- 'timestamp': 1445509539,
- 'description': 'Turkey guide',
- 'upload_date': '20151022',
- 'title': 'Turkey',
- }
+ 'id': '1024781',
+ 'ext': 'mp4',
+ 'title': 'Neapolitan Checkerboard Cookies',
+ 'description': 'md5:ba12394c585ababea951cb6d2fcc6631',
+ 'timestamp': 1701302400,
+ 'upload_date': '20231130',
+ 'creator': 'Sue Li',
+ 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
+ },
+ }, {
+ 'url': 'https://cooking.nytimes.com/recipes/1019516-overnight-oats',
+ 'md5': '2fe7965a3adc899913b8e25ada360823',
+ 'info_dict': {
+ 'id': '1019516',
+ 'ext': 'mp4',
+ 'timestamp': 1546387200,
+ 'description': 'md5:8856ce10239161bd2596ac335b9f9bfb',
+ 'upload_date': '20190102',
+ 'title': 'Overnight Oats',
+ 'creator': 'Genevieve Ko',
+ 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
+ },
}]
def _real_extract(self, url):
page_id = self._match_id(url)
-
webpage = self._download_webpage(url, page_id)
+ recipe_data = self._search_nextjs_data(webpage, page_id)['props']['pageProps']['recipe']
- video_id = self._search_regex(
- r'data-video-id=["\'](\d+)', webpage, 'video id')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ recipe_data['videoSrc'], page_id, 'mp4', m3u8_id='hls')
- return self._extract_video_from_id(video_id)
+ return {
+ **traverse_obj(recipe_data, {
+ 'id': ('id', {str_or_none}),
+ 'title': ('title', {str}),
+ 'description': ('topnote', {clean_html}),
+ 'timestamp': ('publishedAt', {int_or_none}),
+ 'creator': ('contentAttribution', 'cardByline', {str}),
+ }),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': [{'url': thumb_url} for thumb_url in traverse_obj(
+ recipe_data, ('image', 'crops', 'recipe', ..., {url_or_none}))],
+ }
From acaf806c15f0a802ba286c23af02a10cf4bd4731 Mon Sep 17 00:00:00 2001
From: DmitryScaletta
Date: Mon, 5 Feb 2024 05:17:39 +0300
Subject: [PATCH 10/10] [ie/nuum] Add extractors (#8868)
Authored by: DmitryScaletta, seproDev
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
---
yt_dlp/extractor/_extractors.py | 10 +-
yt_dlp/extractor/nuum.py | 199 ++++++++++++++++++++++++++++++++
yt_dlp/extractor/wasdtv.py | 159 -------------------------
3 files changed, 204 insertions(+), 164 deletions(-)
create mode 100644 yt_dlp/extractor/nuum.py
delete mode 100644 yt_dlp/extractor/wasdtv.py
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 36335286c3a0..e7dd34c77bda 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1354,6 +1354,11 @@
NYTimesCookingIE,
NYTimesCookingRecipeIE,
)
+from .nuum import (
+ NuumLiveIE,
+ NuumTabIE,
+ NuumMediaIE,
+)
from .nuvid import NuvidIE
from .nzherald import NZHeraldIE
from .nzonscreen import NZOnScreenIE
@@ -2315,11 +2320,6 @@
WashingtonPostIE,
WashingtonPostArticleIE,
)
-from .wasdtv import (
- WASDTVStreamIE,
- WASDTVRecordIE,
- WASDTVClipIE,
-)
from .wat import WatIE
from .wdr import (
WDRIE,
diff --git a/yt_dlp/extractor/nuum.py b/yt_dlp/extractor/nuum.py
new file mode 100644
index 000000000000..3db663ded0dd
--- /dev/null
+++ b/yt_dlp/extractor/nuum.py
@@ -0,0 +1,199 @@
+import functools
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
+ UserNotLive,
+ filter_dict,
+ int_or_none,
+ parse_iso8601,
+ str_or_none,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class NuumBaseIE(InfoExtractor):
+ def _call_api(self, path, video_id, description, query={}):
+ response = self._download_json(
+ f'https://nuum.ru/api/v2/{path}', video_id, query=query,
+ note=f'Downloading {description} metadata',
+ errnote=f'Unable to download {description} metadata')
+ if error := response.get('error'):
+ raise ExtractorError(f'API returned error: {error!r}')
+ return response['result']
+
+ def _get_channel_info(self, channel_name):
+ return self._call_api(
+ 'broadcasts/public', video_id=channel_name, description='channel',
+ query={
+ 'with_extra': 'true',
+ 'channel_name': channel_name,
+ 'with_deleted': 'true',
+ })
+
+ def _parse_video_data(self, container, extract_formats=True):
+ stream = traverse_obj(container, ('media_container_streams', 0, {dict})) or {}
+ media = traverse_obj(stream, ('stream_media', 0, {dict})) or {}
+ media_url = traverse_obj(media, (
+ 'media_meta', ('media_archive_url', 'media_url'), {url_or_none}), get_all=False)
+
+ video_id = str(container['media_container_id'])
+ is_live = media.get('media_status') == 'RUNNING'
+
+ formats, subtitles = None, None
+ if extract_formats:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ media_url, video_id, 'mp4', live=is_live)
+
+ return filter_dict({
+ 'id': video_id,
+ 'is_live': is_live,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(container, {
+ 'title': ('media_container_name', {str}),
+ 'description': ('media_container_description', {str}),
+ 'timestamp': ('created_at', {parse_iso8601}),
+ 'channel': ('media_container_channel', 'channel_name', {str}),
+ 'channel_id': ('media_container_channel', 'channel_id', {str_or_none}),
+ }),
+ **traverse_obj(stream, {
+ 'view_count': ('stream_total_viewers', {int_or_none}),
+ 'concurrent_view_count': ('stream_current_viewers', {int_or_none}),
+ }),
+ **traverse_obj(media, {
+ 'duration': ('media_duration', {int_or_none}),
+ 'thumbnail': ('media_meta', ('media_preview_archive_url', 'media_preview_url'), {url_or_none}),
+ }, get_all=False),
+ })
+
+
+class NuumMediaIE(NuumBaseIE):
+ IE_NAME = 'nuum:media'
+ _VALID_URL = r'https?://nuum\.ru/(?:streams|videos|clips)/(?P[\d]+)'
+ _TESTS = [{
+ 'url': 'https://nuum.ru/streams/1592713-7-days-to-die',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
+ 'md5': 'f1d9118a30403e32b702a204eb03aca3',
+ 'info_dict': {
+ 'id': '1567547',
+ 'ext': 'mp4',
+ 'title': 'Toxi$ - Hurtz',
+ 'description': '',
+ 'timestamp': 1702631651,
+ 'upload_date': '20231215',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'view_count': int,
+ 'concurrent_view_count': int,
+ 'channel_id': '6911',
+ 'channel': 'toxis',
+ 'duration': 116,
+ },
+ }, {
+ 'url': 'https://nuum.ru/clips/1552564-pro-misu',
+ 'md5': 'b248ae1565b1e55433188f11beeb0ca1',
+ 'info_dict': {
+ 'id': '1552564',
+ 'ext': 'mp4',
+ 'title': 'Про Мису 🙃',
+ 'timestamp': 1701971828,
+ 'upload_date': '20231207',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'view_count': int,
+ 'concurrent_view_count': int,
+ 'channel_id': '3320',
+ 'channel': 'Misalelik',
+ 'duration': 41,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._call_api(f'media-containers/{video_id}', video_id, 'media')
+
+ return self._parse_video_data(video_data)
+
+
+class NuumLiveIE(NuumBaseIE):
+ IE_NAME = 'nuum:live'
+ _VALID_URL = r'https?://nuum\.ru/channel/(?P[^/#?]+)/?(?:$|[#?])'
+ _TESTS = [{
+ 'url': 'https://nuum.ru/channel/mts_live',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ channel = self._match_id(url)
+ channel_info = self._get_channel_info(channel)
+ if traverse_obj(channel_info, ('channel', 'channel_is_live')) is False:
+ raise UserNotLive(video_id=channel)
+
+ info = self._parse_video_data(channel_info['media_container'])
+ return {
+ 'webpage_url': f'https://nuum.ru/streams/{info["id"]}',
+ 'extractor_key': NuumMediaIE.ie_key(),
+ 'extractor': NuumMediaIE.IE_NAME,
+ **info,
+ }
+
+
+class NuumTabIE(NuumBaseIE):
+ IE_NAME = 'nuum:tab'
+ _VALID_URL = r'https?://nuum\.ru/channel/(?P[^/#?]+)/(?Pstreams|videos|clips)'
+ _TESTS = [{
+ 'url': 'https://nuum.ru/channel/dankon_/clips',
+ 'info_dict': {
+ 'id': 'dankon__clips',
+ 'title': 'Dankon_',
+ },
+ 'playlist_mincount': 29,
+ }, {
+ 'url': 'https://nuum.ru/channel/dankon_/videos',
+ 'info_dict': {
+ 'id': 'dankon__videos',
+ 'title': 'Dankon_',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://nuum.ru/channel/dankon_/streams',
+ 'info_dict': {
+ 'id': 'dankon__streams',
+ 'title': 'Dankon_',
+ },
+ 'playlist_mincount': 1,
+ }]
+
+ _PAGE_SIZE = 50
+
+ def _fetch_page(self, channel_id, tab_type, tab_id, page):
+ CONTAINER_TYPES = {
+ 'clips': ['SHORT_VIDEO', 'REVIEW_VIDEO'],
+ 'videos': ['LONG_VIDEO'],
+ 'streams': ['SINGLE'],
+ }
+
+ media_containers = self._call_api(
+ 'media-containers', video_id=tab_id, description=f'{tab_type} tab page {page + 1}',
+ query={
+ 'limit': self._PAGE_SIZE,
+ 'offset': page * self._PAGE_SIZE,
+ 'channel_id': channel_id,
+ 'media_container_status': 'STOPPED',
+ 'media_container_type': CONTAINER_TYPES[tab_type],
+ })
+ for container in traverse_obj(media_containers, (..., {dict})):
+ metadata = self._parse_video_data(container, extract_formats=False)
+ yield self.url_result(f'https://nuum.ru/videos/{metadata["id"]}', NuumMediaIE, **metadata)
+
+ def _real_extract(self, url):
+ channel_name, tab_type = self._match_valid_url(url).group('id', 'type')
+ tab_id = f'{channel_name}_{tab_type}'
+ channel_data = self._get_channel_info(channel_name)['channel']
+
+ return self.playlist_result(OnDemandPagedList(functools.partial(
+ self._fetch_page, channel_data['channel_id'], tab_type, tab_id), self._PAGE_SIZE),
+ playlist_id=tab_id, playlist_title=channel_data.get('channel_name'))
diff --git a/yt_dlp/extractor/wasdtv.py b/yt_dlp/extractor/wasdtv.py
deleted file mode 100644
index f57c619b5f88..000000000000
--- a/yt_dlp/extractor/wasdtv.py
+++ /dev/null
@@ -1,159 +0,0 @@
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- int_or_none,
- parse_iso8601,
- traverse_obj,
- try_get,
-)
-
-
-class WASDTVBaseIE(InfoExtractor):
-
- def _fetch(self, path, video_id, description, query={}):
- response = self._download_json(
- f'https://wasd.tv/api/{path}', video_id, query=query,
- note=f'Downloading {description} metadata',
- errnote=f'Unable to download {description} metadata')
- error = response.get('error')
- if error:
- raise ExtractorError(f'{self.IE_NAME} returned error: {error}', expected=True)
- return response.get('result')
-
- def _extract_thumbnails(self, thumbnails_dict):
- return [{
- 'url': url,
- 'preference': index,
- } for index, url in enumerate(
- traverse_obj(thumbnails_dict, (('small', 'medium', 'large'),))) if url]
-
- def _real_extract(self, url):
- container = self._get_container(url)
- stream = traverse_obj(container, ('media_container_streams', 0))
- media = try_get(stream, lambda x: x['stream_media'][0])
- if not media:
- raise ExtractorError('Can not extract media data.', expected=True)
- media_meta = media.get('media_meta')
- media_url, is_live = self._get_media_url(media_meta)
- video_id = media.get('media_id') or container.get('media_container_id')
- formats, subtitles = self._extract_m3u8_formats_and_subtitles(media_url, video_id, 'mp4')
- return {
- 'id': str(video_id),
- 'title': container.get('media_container_name') or self._og_search_title(self._download_webpage(url, video_id)),
- 'description': container.get('media_container_description'),
- 'thumbnails': self._extract_thumbnails(media_meta.get('media_preview_images')),
- 'timestamp': parse_iso8601(container.get('created_at')),
- 'view_count': int_or_none(stream.get('stream_current_viewers' if is_live else 'stream_total_viewers')),
- 'is_live': is_live,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
- def _get_container(self, url):
- raise NotImplementedError('Subclass for get media container')
-
- def _get_media_url(self, media_meta):
- raise NotImplementedError('Subclass for get media url')
-
-
-class WASDTVStreamIE(WASDTVBaseIE):
- IE_NAME = 'wasdtv:stream'
- _VALID_URL = r'https?://wasd\.tv/(?P[^/#?]+)$'
- _TESTS = [{
- 'url': 'https://wasd.tv/24_7',
- 'info_dict': {
- 'id': '559738',
- 'ext': 'mp4',
- 'title': 'Live 24/7 Music',
- 'description': '24/7 Music',
- 'timestamp': int,
- 'upload_date': r're:^\d{8}$',
- 'is_live': True,
- 'view_count': int,
- },
- }]
-
- def _get_container(self, url):
- nickname = self._match_id(url)
- channel = self._fetch(f'channels/nicknames/{nickname}', video_id=nickname, description='channel')
- channel_id = channel.get('channel_id')
- containers = self._fetch(
- 'v2/media-containers', channel_id, 'running media containers',
- query={
- 'channel_id': channel_id,
- 'media_container_type': 'SINGLE',
- 'media_container_status': 'RUNNING',
- })
- if not containers:
- raise ExtractorError(f'{nickname} is offline', expected=True)
- return containers[0]
-
- def _get_media_url(self, media_meta):
- return media_meta['media_url'], True
-
-
-class WASDTVRecordIE(WASDTVBaseIE):
- IE_NAME = 'wasdtv:record'
- _VALID_URL = r'https?://wasd\.tv/[^/#?]+(?:/videos)?\?record=(?P\d+)$'
- _TESTS = [{
- 'url': 'https://wasd.tv/spacemita/videos?record=907755',
- 'md5': 'c9899dd85be4cc997816ff9f9ca516ce',
- 'info_dict': {
- 'id': '906825',
- 'ext': 'mp4',
- 'title': 'Музыкальный',
- 'description': 'md5:f510388d929ff60ae61d4c3cab3137cc',
- 'timestamp': 1645812079,
- 'upload_date': '20220225',
- 'thumbnail': r're:^https?://.+\.jpg',
- 'is_live': False,
- 'view_count': int,
- },
- }, {
- 'url': 'https://wasd.tv/spacemita?record=907755',
- 'only_matching': True,
- }]
-
- def _get_container(self, url):
- container_id = self._match_id(url)
- return self._fetch(
- f'v2/media-containers/{container_id}', container_id, 'media container')
-
- def _get_media_url(self, media_meta):
- media_archive_url = media_meta.get('media_archive_url')
- if media_archive_url:
- return media_archive_url, False
- return media_meta['media_url'], True
-
-
-class WASDTVClipIE(WASDTVBaseIE):
- IE_NAME = 'wasdtv:clip'
- _VALID_URL = r'https?://wasd\.tv/[^/#?]+/clips\?clip=(?P\d+)$'
- _TESTS = [{
- 'url': 'https://wasd.tv/spacemita/clips?clip=26804',
- 'md5': '818885e720143d7a4e776ff66fcff148',
- 'info_dict': {
- 'id': '26804',
- 'ext': 'mp4',
- 'title': 'Пуш флексит на голове стримера',
- 'timestamp': 1646682908,
- 'upload_date': '20220307',
- 'thumbnail': r're:^https?://.+\.jpg',
- 'view_count': int,
- },
- }]
-
- def _real_extract(self, url):
- clip_id = self._match_id(url)
- clip = self._fetch(f'v2/clips/{clip_id}', video_id=clip_id, description='clip')
- clip_data = clip.get('clip_data')
- formats, subtitles = self._extract_m3u8_formats_and_subtitles(clip_data.get('url'), video_id=clip_id, ext='mp4')
- return {
- 'id': clip_id,
- 'title': clip.get('clip_title') or self._og_search_title(self._download_webpage(url, clip_id, fatal=False)),
- 'thumbnails': self._extract_thumbnails(clip_data.get('preview')),
- 'timestamp': parse_iso8601(clip.get('created_at')),
- 'view_count': int_or_none(clip.get('clip_views_count')),
- 'formats': formats,
- 'subtitles': subtitles,
- }