Skip to content

Commit

Permalink
Merge branch 'yt-dlp:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
saintliao authored Dec 13, 2023
2 parents 63d3795 + 6b5d93b commit de1d3d2
Show file tree
Hide file tree
Showing 20 changed files with 338 additions and 89 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,7 @@ The available fields are:
- `was_live` (boolean): Whether this video was originally a live stream
- `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites
- `availability` (string): Whether the video is "private", "premium_only", "subscriber_only", "needs_auth", "unlisted" or "public"
- `media_type` (string): The type of media as classified by the site, e.g. "episode", "clip", "trailer"
- `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
- `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
- `extractor` (string): Name of the extractor
Expand Down
17 changes: 0 additions & 17 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2317,23 +2317,6 @@ def test_traverse_obj(self):
self.assertEqual(traverse_obj({}, (0, slice(1)), traverse_string=True), [],
msg='branching should result in list if `traverse_string`')

# Test is_user_input behavior
_IS_USER_INPUT_DATA = {'range8': list(range(8))}
self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'),
is_user_input=True), 3,
msg='allow for string indexing if `is_user_input`')
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'),
is_user_input=True), tuple(range(8))[3:],
msg='allow for string slice if `is_user_input`')
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'),
is_user_input=True), tuple(range(8))[:4:2],
msg='allow step in string slice if `is_user_input`')
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'),
is_user_input=True), range(8),
msg='`:` should be treated as `...` if `is_user_input`')
with self.assertRaises(TypeError, msg='too many params should result in error'):
traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), is_user_input=True)

# Test re.Match as input obj
mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123')
self.assertEqual(traverse_obj(mobj, ...), [x for x in mobj.groups() if x is not None],
Expand Down
14 changes: 12 additions & 2 deletions yt_dlp/YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,15 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
(?:\|(?P<default>.*?))?
)$''')

def _from_user_input(field):
if field == ':':
return ...
elif ':' in field:
return slice(*map(int_or_none, field.split(':')))
elif int_or_none(field) is not None:
return int(field)
return field

def _traverse_infodict(fields):
fields = [f for x in re.split(r'\.({.+?})\.?', fields)
for f in ([x] if x.startswith('{') else x.split('.'))]
Expand All @@ -1210,11 +1219,12 @@ def _traverse_infodict(fields):

for i, f in enumerate(fields):
if not f.startswith('{'):
fields[i] = _from_user_input(f)
continue
assert f.endswith('}'), f'No closing brace for {f} in {fields}'
fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}

return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
return traverse_obj(info_dict, fields, traverse_string=True)

def get_value(mdict):
# Object traversal
Expand Down
23 changes: 21 additions & 2 deletions yt_dlp/extractor/aenetworks.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,21 @@ class AENetworksIE(AENetworksBaseIE):
'info_dict': {
'id': '22253814',
'ext': 'mp4',
'title': 'Winter is Coming',
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
'title': 'Winter Is Coming',
'description': 'md5:a40e370925074260b1c8a633c632c63a',
'timestamp': 1338306241,
'upload_date': '20120529',
'uploader': 'AENE-NEW',
'duration': 2592.0,
'thumbnail': r're:^https?://.*\.jpe?g$',
'chapters': 'count:5',
'tags': 'count:14',
'categories': ['Mountain Men'],
'episode_number': 1,
'episode': 'Episode 1',
'season': 'Season 1',
'season_number': 1,
'series': 'Mountain Men',
},
'params': {
# m3u8 download
Expand All @@ -143,6 +153,15 @@ class AENetworksIE(AENetworksBaseIE):
'timestamp': 1452634428,
'upload_date': '20160112',
'uploader': 'AENE-NEW',
'duration': 1277.695,
'thumbnail': r're:^https?://.*\.jpe?g$',
'chapters': 'count:4',
'tags': 'count:23',
'episode': 'Episode 1',
'episode_number': 1,
'season': 'Season 9',
'season_number': 9,
'series': 'Duck Dynasty',
},
'params': {
# m3u8 download
Expand Down
44 changes: 42 additions & 2 deletions yt_dlp/extractor/bitchute.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
ExtractorError,
OnDemandPagedList,
clean_html,
extract_attributes,
get_element_by_class,
get_element_by_id,
get_element_html_by_class,
get_elements_html_by_class,
int_or_none,
orderedSet,
Expand All @@ -17,6 +19,7 @@
traverse_obj,
unified_strdate,
urlencode_postdata,
urljoin,
)


Expand All @@ -34,6 +37,25 @@ class BitChuteIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'BitChute',
'upload_date': '20170103',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
},
}, {
# test case: video with different channel and uploader
'url': 'https://www.bitchute.com/video/Yti_j9A-UZ4/',
'md5': 'f10e6a8e787766235946d0868703f1d0',
'info_dict': {
'id': 'Yti_j9A-UZ4',
'ext': 'mp4',
'title': 'Israel at War | Full Measure',
'description': 'md5:38cf7bc6f42da1a877835539111c69ef',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'sharylattkisson',
'upload_date': '20231106',
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
'channel': 'Full Measure with Sharyl Attkisson',
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/'
},
}, {
# video not downloadable in browser, but we can recover it
Expand All @@ -48,6 +70,9 @@ class BitChuteIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'BitChute',
'upload_date': '20181113',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
},
'params': {'check_formats': None},
}, {
Expand Down Expand Up @@ -99,6 +124,11 @@ def _raise_if_restricted(self, webpage):
reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
self.raise_geo_restricted(reason)

@staticmethod
def _make_url(html):
path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href')
return urljoin('https://www.bitchute.com', path)

def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
Expand All @@ -121,12 +151,19 @@ def _real_extract(self, url):
'Video is unavailable. Please make sure this video is playable in the browser '
'before reporting this issue.', expected=True, video_id=video_id)

details = get_element_by_class('details', webpage) or ''
uploader_html = get_element_html_by_class('creator', details) or ''
channel_html = get_element_html_by_class('name', details) or ''

return {
'id': video_id,
'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
'description': self._og_search_description(webpage, default=None),
'thumbnail': self._og_search_thumbnail(webpage),
'uploader': clean_html(get_element_by_class('owner', webpage)),
'uploader': clean_html(uploader_html),
'uploader_url': self._make_url(uploader_html),
'channel': clean_html(channel_html),
'channel_url': self._make_url(channel_html),
'upload_date': unified_strdate(self._search_regex(
r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
'formats': formats,
Expand Down Expand Up @@ -154,6 +191,9 @@ class BitChuteChannelIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'BitChute',
'upload_date': '20170103',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
'duration': 16,
'view_count': int,
},
Expand All @@ -169,7 +209,7 @@ class BitChuteChannelIE(InfoExtractor):
'info_dict': {
'id': 'wV9Imujxasw9',
'title': 'Bruce MacDonald and "The Light of Darkness"',
'description': 'md5:04913227d2714af1d36d804aa2ab6b1e',
'description': 'md5:747724ef404eebdfc04277714f81863e',
}
}]

Expand Down
37 changes: 28 additions & 9 deletions yt_dlp/extractor/cbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,13 @@ class CBCPlayerIE(InfoExtractor):
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
'chapters': [],
'duration': 494.811,
'categories': ['AudioMobile/All in a Weekend Montreal'],
'tags': 'count:8',
'location': 'Quebec',
'series': 'All in a Weekend Montreal',
'season': 'Season 2015',
'season_number': 2015,
'media_type': 'Excerpt',
},
}, {
'url': 'http://www.cbc.ca/player/play/2164402062',
Expand All @@ -195,25 +202,37 @@ class CBCPlayerIE(InfoExtractor):
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
'chapters': [],
'duration': 186.867,
'series': 'CBC News: Windsor at 6:00',
'categories': ['News/Canada/Windsor'],
'location': 'Windsor',
'tags': ['cancer'],
'creator': 'Allison Johnson',
'media_type': 'Excerpt',
},
}, {
# Has subtitles
# These broadcasts expire after ~1 month, can find new test URL here:
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
'url': 'http://www.cbc.ca/player/play/2249992771553',
'md5': '2f2fb675dd4f0f8a5bb7588d1b13bacd',
'url': 'http://www.cbc.ca/player/play/2284799043667',
'md5': '9b49f0839e88b6ec0b01d840cf3d42b5',
'info_dict': {
'id': '2249992771553',
'id': '2284799043667',
'ext': 'mp4',
'title': 'The National | Women’s soccer pay, Florida seawater, Swift quake',
'description': 'md5:adba28011a56cfa47a080ff198dad27a',
'timestamp': 1690596000,
'duration': 2716.333,
'title': 'The National | Hockey coach charged, Green grants, Safer drugs',
'description': 'md5:84ef46321c94bcf7d0159bb565d26bfa',
'timestamp': 1700272800,
'duration': 2718.833,
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/481/326/thumbnail.jpeg',
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/907/171/thumbnail.jpeg',
'uploader': 'CBCC-NEW',
'chapters': 'count:5',
'upload_date': '20230729',
'upload_date': '20231118',
'categories': 'count:4',
'series': 'The National - Full Show',
'tags': 'count:1',
'creator': 'News',
'location': 'Canada',
'media_type': 'Full Program',
},
}]

Expand Down
1 change: 1 addition & 0 deletions yt_dlp/extractor/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ class InfoExtractor:
'private', 'premium_only', 'subscriber_only', 'needs_auth',
'unlisted' or 'public'. Use 'InfoExtractor._availability'
to set it
media_type: The type of media as classified by the site, e.g. "episode", "clip", "trailer"
_old_archive_ids: A list of old archive ids needed for backward compatibility
_format_sort_fields: A list of fields to use for sorting formats
__post_extractor: A function to be called just before the metadata is
Expand Down
4 changes: 4 additions & 0 deletions yt_dlp/extractor/cwtv.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ class CWTVIE(InfoExtractor):
'timestamp': 1444107300,
'age_limit': 14,
'uploader': 'CWTV',
'thumbnail': r're:^https?://.*\.jpe?g$',
'chapters': 'count:4',
'episode': 'Episode 20',
'season': 'Season 11',
},
'params': {
# m3u8 download
Expand Down
Loading

0 comments on commit de1d3d2

Please sign in to comment.