Skip to content

Commit

Permalink
Merge branch 'yt-dlp:master' into patched
Browse files Browse the repository at this point in the history
  • Loading branch information
kclauhk authored Feb 5, 2024
2 parents 4b3e80f + acaf806 commit 0fcbe87
Show file tree
Hide file tree
Showing 11 changed files with 734 additions and 316 deletions.
13 changes: 8 additions & 5 deletions yt_dlp/extractor/_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1357,6 +1357,12 @@
NYTimesIE,
NYTimesArticleIE,
NYTimesCookingIE,
NYTimesCookingRecipeIE,
)
from .nuum import (
NuumLiveIE,
NuumTabIE,
NuumMediaIE,
)
from .nuvid import NuvidIE
from .nzherald import NZHeraldIE
Expand Down Expand Up @@ -1399,6 +1405,7 @@
from .orf import (
ORFTVthekIE,
ORFFM4StoryIE,
ORFONIE,
ORFRadioIE,
ORFPodcastIE,
ORFIPTVIE,
Expand Down Expand Up @@ -2318,11 +2325,6 @@
WashingtonPostIE,
WashingtonPostArticleIE,
)
from .wasdtv import (
WASDTVStreamIE,
WASDTVRecordIE,
WASDTVClipIE,
)
from .wat import WatIE
from .wdr import (
WDRIE,
Expand Down Expand Up @@ -2501,6 +2503,7 @@
Zee5SeriesIE,
)
from .zeenews import ZeeNewsIE
from .zetland import ZetlandDKArticleIE
from .zhihu import ZhihuIE
from .zingmp3 import (
ZingMp3IE,
Expand Down
55 changes: 55 additions & 0 deletions yt_dlp/extractor/bilibili.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import re
import time
import urllib.parse
import uuid

from .common import InfoExtractor, SearchInfoExtractor
from ..dependencies import Cryptodome
Expand Down Expand Up @@ -1304,6 +1305,26 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'upload_date': '20211127',
},
'playlist_mincount': 513,
}, {
'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
'info_dict': {
'id': 'BV1DU4y1r7tz',
'ext': 'mp4',
'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
'upload_date': '20220820',
'description': '',
'timestamp': 1661016330,
'uploader_id': '1958703906',
'uploader': '靡烟miya',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'duration': 9552.903,
'tags': list,
'comment_count': int,
'view_count': int,
'like_count': int,
'_old_archive_ids': ['bilibili 687146339_part1'],
},
'params': {'noplaylist': True},
}, {
'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
'info_dict': {
Expand Down Expand Up @@ -1355,6 +1376,11 @@ def _extract_medialist(self, query, list_id):

def _real_extract(self, url):
list_id = self._match_id(url)

bvid = traverse_obj(parse_qs(url), ('bvid', 0))
if not self._yes_playlist(list_id, bvid):
return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)

webpage = self._download_webpage(url, list_id)
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
Expand Down Expand Up @@ -1464,8 +1490,37 @@ class BiliBiliSearchIE(SearchInfoExtractor):
IE_DESC = 'Bilibili video search'
_MAX_RESULTS = 100000
_SEARCH_KEY = 'bilisearch'
_TESTS = [{
'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
'playlist_count': 3,
'info_dict': {
'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
},
'playlist': [{
'info_dict': {
'id': 'BV1n44y1Q7sc',
'ext': 'mp4',
'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
'timestamp': 1669889987,
'upload_date': '20221201',
'description': 'md5:43343c0973defff527b5a4b403b4abf9',
'tags': list,
'uploader': '靡烟miya',
'duration': 123.156,
'uploader_id': '1958703906',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 988222410_part1'],
},
}],
}]

def _search_results(self, query):
if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
for page_num in itertools.count(1):
videos = self._download_json(
'https://api.bilibili.com/x/web-interface/search/type', query,
Expand Down
6 changes: 6 additions & 0 deletions yt_dlp/extractor/ccma.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
int_or_none,
parse_duration,
parse_resolution,
Expand Down Expand Up @@ -60,6 +61,7 @@ def _real_extract(self, url):
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
'media': media_type,
'idint': media_id,
'format': 'dm',
})

formats = []
Expand All @@ -69,6 +71,10 @@ def _real_extract(self, url):
format_url = url_or_none(format_.get('file'))
if not format_url:
continue
if determine_ext(format_url) == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, media_id, mpd_id='dash', fatal=False))
continue
label = format_.get('label')
f = parse_resolution(label)
f.update({
Expand Down
13 changes: 12 additions & 1 deletion yt_dlp/extractor/eporner.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from .common import InfoExtractor
from ..utils import (
encode_base_n,
ExtractorError,
encode_base_n,
get_elements_by_class,
int_or_none,
join_nonempty,
merge_dicts,
parse_duration,
str_to_int,
Expand Down Expand Up @@ -81,6 +83,7 @@ def calc_hash(s):
sources = video['sources']

formats = []
has_av1 = bool(get_elements_by_class('download-av1', webpage))
for kind, formats_dict in sources.items():
if not isinstance(formats_dict, dict):
continue
Expand All @@ -106,6 +109,14 @@ def calc_hash(s):
'height': height,
'fps': fps,
})
if has_av1:
formats.append({
'url': src.replace('.mp4', '-av1.mp4'),
'format_id': join_nonempty('av1', format_id),
'height': height,
'fps': fps,
'vcodec': 'av1',
})

json_ld = self._search_json_ld(webpage, display_id, default={})

Expand Down
199 changes: 199 additions & 0 deletions yt_dlp/extractor/nuum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
import functools

from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
UserNotLive,
filter_dict,
int_or_none,
parse_iso8601,
str_or_none,
url_or_none,
)
from ..utils.traversal import traverse_obj


class NuumBaseIE(InfoExtractor):
def _call_api(self, path, video_id, description, query={}):
response = self._download_json(
f'https://nuum.ru/api/v2/{path}', video_id, query=query,
note=f'Downloading {description} metadata',
errnote=f'Unable to download {description} metadata')
if error := response.get('error'):
raise ExtractorError(f'API returned error: {error!r}')
return response['result']

def _get_channel_info(self, channel_name):
return self._call_api(
'broadcasts/public', video_id=channel_name, description='channel',
query={
'with_extra': 'true',
'channel_name': channel_name,
'with_deleted': 'true',
})

def _parse_video_data(self, container, extract_formats=True):
stream = traverse_obj(container, ('media_container_streams', 0, {dict})) or {}
media = traverse_obj(stream, ('stream_media', 0, {dict})) or {}
media_url = traverse_obj(media, (
'media_meta', ('media_archive_url', 'media_url'), {url_or_none}), get_all=False)

video_id = str(container['media_container_id'])
is_live = media.get('media_status') == 'RUNNING'

formats, subtitles = None, None
if extract_formats:
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
media_url, video_id, 'mp4', live=is_live)

return filter_dict({
'id': video_id,
'is_live': is_live,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(container, {
'title': ('media_container_name', {str}),
'description': ('media_container_description', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'channel': ('media_container_channel', 'channel_name', {str}),
'channel_id': ('media_container_channel', 'channel_id', {str_or_none}),
}),
**traverse_obj(stream, {
'view_count': ('stream_total_viewers', {int_or_none}),
'concurrent_view_count': ('stream_current_viewers', {int_or_none}),
}),
**traverse_obj(media, {
'duration': ('media_duration', {int_or_none}),
'thumbnail': ('media_meta', ('media_preview_archive_url', 'media_preview_url'), {url_or_none}),
}, get_all=False),
})


class NuumMediaIE(NuumBaseIE):
IE_NAME = 'nuum:media'
_VALID_URL = r'https?://nuum\.ru/(?:streams|videos|clips)/(?P<id>[\d]+)'
_TESTS = [{
'url': 'https://nuum.ru/streams/1592713-7-days-to-die',
'only_matching': True,
}, {
'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
'md5': 'f1d9118a30403e32b702a204eb03aca3',
'info_dict': {
'id': '1567547',
'ext': 'mp4',
'title': 'Toxi$ - Hurtz',
'description': '',
'timestamp': 1702631651,
'upload_date': '20231215',
'thumbnail': r're:^https?://.+\.jpg',
'view_count': int,
'concurrent_view_count': int,
'channel_id': '6911',
'channel': 'toxis',
'duration': 116,
},
}, {
'url': 'https://nuum.ru/clips/1552564-pro-misu',
'md5': 'b248ae1565b1e55433188f11beeb0ca1',
'info_dict': {
'id': '1552564',
'ext': 'mp4',
'title': 'Про Мису 🙃',
'timestamp': 1701971828,
'upload_date': '20231207',
'thumbnail': r're:^https?://.+\.jpg',
'view_count': int,
'concurrent_view_count': int,
'channel_id': '3320',
'channel': 'Misalelik',
'duration': 41,
},
}]

def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._call_api(f'media-containers/{video_id}', video_id, 'media')

return self._parse_video_data(video_data)


class NuumLiveIE(NuumBaseIE):
IE_NAME = 'nuum:live'
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/?(?:$|[#?])'
_TESTS = [{
'url': 'https://nuum.ru/channel/mts_live',
'only_matching': True,
}]

def _real_extract(self, url):
channel = self._match_id(url)
channel_info = self._get_channel_info(channel)
if traverse_obj(channel_info, ('channel', 'channel_is_live')) is False:
raise UserNotLive(video_id=channel)

info = self._parse_video_data(channel_info['media_container'])
return {
'webpage_url': f'https://nuum.ru/streams/{info["id"]}',
'extractor_key': NuumMediaIE.ie_key(),
'extractor': NuumMediaIE.IE_NAME,
**info,
}


class NuumTabIE(NuumBaseIE):
IE_NAME = 'nuum:tab'
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/(?P<type>streams|videos|clips)'
_TESTS = [{
'url': 'https://nuum.ru/channel/dankon_/clips',
'info_dict': {
'id': 'dankon__clips',
'title': 'Dankon_',
},
'playlist_mincount': 29,
}, {
'url': 'https://nuum.ru/channel/dankon_/videos',
'info_dict': {
'id': 'dankon__videos',
'title': 'Dankon_',
},
'playlist_mincount': 2,
}, {
'url': 'https://nuum.ru/channel/dankon_/streams',
'info_dict': {
'id': 'dankon__streams',
'title': 'Dankon_',
},
'playlist_mincount': 1,
}]

_PAGE_SIZE = 50

def _fetch_page(self, channel_id, tab_type, tab_id, page):
CONTAINER_TYPES = {
'clips': ['SHORT_VIDEO', 'REVIEW_VIDEO'],
'videos': ['LONG_VIDEO'],
'streams': ['SINGLE'],
}

media_containers = self._call_api(
'media-containers', video_id=tab_id, description=f'{tab_type} tab page {page + 1}',
query={
'limit': self._PAGE_SIZE,
'offset': page * self._PAGE_SIZE,
'channel_id': channel_id,
'media_container_status': 'STOPPED',
'media_container_type': CONTAINER_TYPES[tab_type],
})
for container in traverse_obj(media_containers, (..., {dict})):
metadata = self._parse_video_data(container, extract_formats=False)
yield self.url_result(f'https://nuum.ru/videos/{metadata["id"]}', NuumMediaIE, **metadata)

def _real_extract(self, url):
channel_name, tab_type = self._match_valid_url(url).group('id', 'type')
tab_id = f'{channel_name}_{tab_type}'
channel_data = self._get_channel_info(channel_name)['channel']

return self.playlist_result(OnDemandPagedList(functools.partial(
self._fetch_page, channel_data['channel_id'], tab_type, tab_id), self._PAGE_SIZE),
playlist_id=tab_id, playlist_title=channel_data.get('channel_name'))
Loading

0 comments on commit 0fcbe87

Please sign in to comment.