Skip to content

Commit

Permalink
[ie/bluey] Add extractor
Browse files Browse the repository at this point in the history
  • Loading branch information
kclauhk committed Jul 21, 2024
1 parent 43412f6 commit 7e39550
Show file tree
Hide file tree
Showing 2 changed files with 276 additions and 0 deletions.
1 change: 1 addition & 0 deletions yt_dlp/extractor/_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@
from .blerp import BlerpIE
from .blogger import BloggerIE
from .bloomberg import BloombergIE
from .bluey import BlueyIE
from .bokecc import BokeCCIE
from .bongacams import BongaCamsIE
from .boosty import BoostyIE
Expand Down
275 changes: 275 additions & 0 deletions yt_dlp/extractor/bluey.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
import re

from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
merge_dicts,
str_or_none,
traverse_obj,
url_or_none,
)


class BlueyIE(InfoExtractor):
_VALID_URL = r'https?://www\.bluey\.tv/(?:.+/)?(?P<id>[^/]+)/?$'
_TESTS = [{
# Episode (YouTube embeded: https://youtu.be/u6D2ucvSas0)
'url': 'https://www.bluey.tv/watch/season-1/mums-and-dads/',
'info_dict': {
'id': 'u6D2ucvSas0',
'ext': 'mp4',
'title': 'Mums and Dads',
'description': 'md5:e215cd5c6d6ec050a354d2b06ad6fc9d',
'thumbnail': 'https://www.bluey.tv/wp-content/uploads/2023/08/ABTI325R50_MUMS_AND_DADS_Image_00.jpg',
'timestamp': 1591362032,
'upload_date': '20200605',
'uploader': 'Official Bluey TV',
'uploader_id': '@BlueyOfficialChannel',
'uploader_url': 'https://www.youtube.com/@BlueyOfficialChannel',
'channel': 'Bluey - Official Channel',
'channel_id': 'UCVzLLZkDuFGAE2BGdBuBNBg',
'channel_url': 'https://www.youtube.com/channel/UCVzLLZkDuFGAE2BGdBuBNBg',
'channel_follower_count': int,
'channel_is_verified': True,
'duration': 118,
'view_count': int,
'like_count': int,
'age_limit': 0,
'availability': 'public',
'categories': ['Film & Animation'],
'tags': 'count:18',
'heatmap': 'count:100',
'live_status': 'not_live',
'playable_in_embed': True,
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 33',
'episode_number': 33,
},
}, {
# Episode with trailer video
'url': 'https://www.bluey.tv/watch/season-3/the-sign/',
'info_dict': {
'id': 'the-sign',
'title': 'The Sign',
'description': 'md5:6e9b01b32f35bdcf33160c86a15080f7',
'thumbnail': 'https://www.bluey.tv/wp-content/uploads/2024/02/Sign-Sq.png',
'uploader': 'Official Bluey TV',
'season': 'Season 3',
'season_number': 3,
'episode': 'Episode 49',
'episode_number': 49,
},
'playlist_count': 2,
}, {
# Minisode (Brightcove)
'url': 'https://www.bluey.tv/watch/minisodes/animals/',
'info_dict': {
'id': 'animals',
'ext': 'mp4',
'title': 'Animals',
'description': 'Mum is playing the animal game on Bingo\'s back.',
'thumbnail': 'https://cf-images.us-east-1.prod.boltdns.net/v1/jit/6041795457001/b8000e79-49d6-4732-88be-09fb0d484a98/main/1280x720/11s413ms/match/image.jpg',
'upload_date': '20240701',
'uploader': 'Official Bluey TV',
'tags': [],
'episode': 'Episode 7',
'episode_number': 7,
'duration': 22827,
},
}, {
# Book-read (YouTube embeded: https://youtu.be/NbLxoLyPGyc)
'url': 'https://www.bluey.tv/watch/bluey-book-reads/charades-2/',
'info_dict': {
'id': 'NbLxoLyPGyc',
'ext': 'mp4',
'title': 'Charades',
'description': 'Jenna Fischer reads \'Charades\'',
'thumbnail': 'https://www.bluey.tv/wp-content/uploads/2024/02/AVSA067W_BlueyBookReads_S01_E06_Charades_TitlePromo_16x9.png',
'timestamp': 1713538806,
'release_date': '20240419',
'release_timestamp': 1713538806,
'upload_date': '20240419',
'uploader': 'Official Bluey TV',
'uploader_id': '@BlueyOfficialChannel',
'uploader_url': 'https://www.youtube.com/@BlueyOfficialChannel',
'channel': 'Bluey - Official Channel',
'channel_id': 'UCVzLLZkDuFGAE2BGdBuBNBg',
'channel_url': 'https://www.youtube.com/channel/UCVzLLZkDuFGAE2BGdBuBNBg',
'channel_follower_count': int,
'channel_is_verified': True,
'duration': 280,
'view_count': int,
'like_count': int,
'age_limit': 0,
'availability': 'public',
'categories': ['Film & Animation'],
'heatmap': 'count:100',
'live_status': 'not_live',
'playable_in_embed': True,
'tags': 'count:28',
},
}, {
# Bonus-bit (YouTube embeded: https://youtu.be/UUkb_b5UEE0)
'url': 'https://www.bluey.tv/watch/bonus-bits/tea-party/',
'info_dict': {
'id': 'UUkb_b5UEE0',
'ext': 'mp4',
'title': 'Tea Party',
'description': 'Bluey and Honey invite Honey\'s mum and dad to a tea party.',
'thumbnail': 'https://www.bluey.tv/wp-content/uploads/2021/03/Bluey_Tea_Party_001.jpg',
'timestamp': 1614960018,
'upload_date': '20210305',
'uploader': 'Official Bluey TV',
'uploader_id': '@BlueyOfficialChannel',
'uploader_url': 'https://www.youtube.com/@BlueyOfficialChannel',
'channel': 'Bluey - Official Channel',
'channel_id': 'UCVzLLZkDuFGAE2BGdBuBNBg',
'channel_url': 'https://www.youtube.com/channel/UCVzLLZkDuFGAE2BGdBuBNBg',
'channel_follower_count': int,
'channel_is_verified': True,
'duration': 95,
'view_count': int,
'like_count': int,
'age_limit': 0,
'availability': 'public',
'categories': ['Film & Animation'],
'heatmap': 'count:100',
'live_status': 'not_live',
'playable_in_embed': True,
'tags': 'count:24',
},
}, {
# Characters (YouTube embeded: https://youtu.be/HlOIzz-GIxk)
'url': 'https://www.bluey.tv/characters/bluey/',
'info_dict': {
'id': 'HlOIzz-GIxk',
'ext': 'mp4',
'title': 'BLUEY\'S HIGHLIGHTS',
'description': 'Bluey is a blue heeler pup who loves to make up and play fun and imaginative games with her family and friends.',
'thumbnail': 'https://www.bluey.tv/wp-content/uploads/2023/07/ABTI291B50_THE_BEACH_Image_09-scaled.jpg',
'timestamp': 1665759612,
'upload_date': '20221014',
'uploader': 'Official Bluey TV',
'uploader_id': '@BlueyOfficialChannel',
'uploader_url': 'https://www.youtube.com/@BlueyOfficialChannel',
'channel': 'Bluey - Official Channel',
'channel_id': 'UCVzLLZkDuFGAE2BGdBuBNBg',
'channel_url': 'https://www.youtube.com/channel/UCVzLLZkDuFGAE2BGdBuBNBg',
'channel_follower_count': int,
'channel_is_verified': True,
'duration': 604,
'view_count': int,
'like_count': int,
'age_limit': 0,
'availability': 'public',
'categories': ['Film & Animation'],
'live_status': 'not_live',
'playable_in_embed': True,
'tags': 'count:24',
},
}]

def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)

def brightcove_api(brightcove_id, video_id):
headers = {'Accept': 'application/json;pk=BCpkADawqM0-e9kbtiYMtk9IxVZUWQ1X3DfbKGkMTtgzX-8zRbWKYj31aVgMTPXxCK3Uy_J4wYE8mXuYHlLUhu47Tsco9l6H_-3_BJKL10ip7fnY8tUiCotYIoaMcOTeqCwM9Vn2trMyy3HM'}
if data := self._download_json(f'https://edge.api.brightcove.com/playback/v1/accounts/6041795457001/videos/{brightcove_id}',
video_id, headers=headers, fatal=False):
formats, subtitles = [], {}
for source in data.get('sources'):
if source.get('type') == 'application/x-mpegURL' and source.get('src'):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
source['src'], video_id, 'mp4', m3u8_id='hls', fatal=False)
for idx, f in enumerate(fmts):
fmts[idx]['format_id'] = f['format_id'].replace(' ', '').replace(')', '') + '-' + source['src'].split(':')[0]
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
elif source.get('type') == 'application/dash+xml' and source.get('src'):
fmts, subs = self._extract_mpd_formats_and_subtitles(
source['src'], video_id, mpd_id='dash', fatal=False)
for idx, f in enumerate(fmts):
fmts[idx]['format_id'] = f['format_id'] + '-' + source['src'].split(':')[0]
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
**traverse_obj(data, {
'description': (('long_description', 'description'), {str_or_none}),
'thumbnail': (('poster', 'thumbnail'), {url_or_none}),
'tags': ('tags', {list}),
'upload_date': (('published_at', 'created_at'),
{lambda x: x[:10].replace('-', '') if x else None}),
'duration': ('duration', {int_or_none}),
}, get_all=False),
'formats': formats,
'subtitles': subtitles,
}
else:
return {}

entries, player_title, player_poster = [], None, None
if player_data := re.findall(r'fe-(\w+)-player" data-props="({[^"]+?})"', webpage):
for idx, data in enumerate(player_data):
if video_data := self._parse_json(clean_html(data[1]), video_id):
player_title = traverse_obj(video_data, ('title', {lambda x: x if x != 'Watch the trailer' else None}))
if idx == 0:
player_poster = traverse_obj(video_data, ('posterImage', {url_or_none}))
if data[0] == 'media':
if video_data.get('type') == 'brightcove' and video_data.get('brightcoveId'):
entries.append(brightcove_api(video_data['brightcoveId'], video_id))
elif video_data.get('type') == 'youtube' and video_data.get('youtubeId'):
entries.append(self.url_result(video_data['youtubeId']))
elif data[0] == 'video' and video_data.get('url'):
if url_or_none(video_data['url']):
entries.append(self.url_result(video_data['url']))
elif int_or_none(video_data['url']):
entries.append(brightcove_api(video_data['url'], video_id))

if json_ld := list(self._yield_json_ld(webpage, video_id)):
info = {
'id': video_id,
**traverse_obj(json_ld[-1], {
'title': (('containsSeason', '@graph'), 0, (('episode', 'name'), 'name'),
{lambda x: re.sub(r'\W+Bluey Official Website$', '', x).split(' | ')[-1] if x else None}),
'description': (('containsSeason', '@graph'), 0,
(('episode', 'description'), 'description'), {str_or_none}),
'thumbnail': ('containsSeason', 0, 'episode', 'image',
{lambda x: x if url_or_none(x) else player_poster}),
'season': ('containsSeason', 0, 'name',
{lambda x: x if re.match(r'Season \d+$', x) else None}),
'season_number': ('containsSeason', 0, 'name',
{lambda x: int(x.replace('Season ', '')) if re.match(r'Season \d+$', x) else None}),
'episode': ('containsSeason', 0, 'episode', 'episodeNumber',
{lambda x: f'Episode {x}' if x else None}),
'episode_number': ('containsSeason', 0, 'episode', 'episodeNumber', {int_or_none}),
}, get_all=False),
}
else:
title = re.sub(r'\W+Bluey Official Website$', '', self._og_search_title(webpage))
info = {
'id': video_id,
'title': title.split(' | ')[-1],
'description': self._og_search_description(webpage),
'thumbnail': player_poster or self._og_search_thumbnail(webpage),
}
if season_number := self._search_regex(r' Season (\d+)', title, 'season_number', default=None):
info['season'] = f'Season {season_number}'
info['season_number'] = int(season_number)
if episode_number := self._search_regex(r' Episode (\d+)', title, 'episode_number', default=None):
info['episode'] = f'Episode {episode_number}'
info['episode_number'] = int(episode_number)
info['uploader'] = self._html_search_meta('article:author', webpage)

if len(entries) > 1:
return self.playlist_result(entries, video_id, **{
k: v for k, v in info.items() if v})
elif len(entries) == 1:
if entries[0].get('_type'):
entries[0]['_type'] = 'url_transparent'
info['title'] = player_title or info['title']
return merge_dicts(entries[0], info)
else:
return info

0 comments on commit 7e39550

Please sign in to comment.