Skip to content

Commit

Permalink
Update facebook.py
Browse files Browse the repository at this point in the history
  • Loading branch information
kclauhk committed Jan 5, 2025
1 parent 44b745c commit 50b3a65
Showing 1 changed file with 55 additions and 18 deletions.
73 changes: 55 additions & 18 deletions yt_dlp/extractor/facebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from ..compat import compat_etree_fromstring
from ..networking import Request
from ..networking.exceptions import network_exceptions
from ..postprocessor import FFmpegPostProcessor
from ..utils import (
ExtractorError,
clean_html,
Expand All @@ -17,6 +18,7 @@
join_nonempty,
js_to_json,
merge_dicts,
parse_codecs,
parse_count,
parse_qs,
qualities,
Expand Down Expand Up @@ -720,8 +722,8 @@ def extract_metadata(field=None):
webpage_info = {
'thumbnails': [{k: v for k, v in {
'url': thumbnail,
'height': int_or_none(self._search_regex(
r'stp=.+_[a-z]\d+x(\d+)&', thumbnail, 'thumbnail height', default=None)),
'height': int_or_none(self._search_regex(r'stp=.+_[a-z]\d+x(\d+)&', thumbnail,
'thumbnail height', default=None)),
'preference': None if 'stp=' in thumbnail else 1,
}.items() if v is not None}] if url_or_none(thumbnail) else [],
'view_count': parse_count(self._search_regex(
Expand All @@ -730,17 +732,21 @@ def extract_metadata(field=None):
}

p_id, s_id, linked_url, data, feedback_data = None, None, None, [], ''
Q = self._search_regex(r'(["\']):\s*[\[{]*\1', (post_data[0] if post_data else ''), 'quotation', default='"')
Q = self._search_regex(
r'(["\']):\s*[\[{]*\1', (post_data[0] if post_data else ''), 'quotation', default='"')
for p_data in post_data[:]:
if rf'{Q}feed_unit{Q}:' in p_data or not re.search(
rf'{Q}(?:dash_manifest_urls?|message|event_description){Q}:', p_data):
# discard useless feed data
post_data.remove(p_data)
else:
if (not s_id or not p_id) and (f'{Q}story{Q}:' in p_data or f'{Q}creation_story{Q}:' in p_data):
p_id = p_id or self._search_regex(rf'{Q}(?:post_id|videoId|video_id){Q}:\s*{Q}(\d+){Q}', p_data,
'post id', default=(video_id if video_id.isnumeric() else None))
s_id = s_id or self._search_regex(rf'id{Q}:\s*{Q}(Uzpf[^{Q}]+){Q}', p_data, 'story id', default=None)
p_id = (p_id
or self._search_regex(rf'{Q}(?:post_id|videoId|video_id){Q}:\s*{Q}(\d+){Q}', p_data,
'post id', default=(video_id if video_id.isnumeric() else None)))
s_id = (s_id
or self._search_regex(rf'id{Q}:\s*{Q}(Uzpf[^{Q}]+){Q}', p_data,
'story id', default=None))
if not data:
if re.search(rf'{Q}attachment{Q}:\s*{{{Q}(?:source|web_link){Q}:', p_data):
# linked video
Expand Down Expand Up @@ -777,6 +783,47 @@ def extract_dash_manifest(vid_data, formats, subtitle, mpd_url=None):
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitle[0])

def extract_progressive_url(format_id, url):
fmt = {
'format_id': format_id,
'url': url,
}
ffmpeg = FFmpegPostProcessor()
if ffmpeg.probe_available:
if data := ffmpeg.get_metadata_object(url):
fmt.update(traverse_obj(data.get('format'), {
'duration': ('duration', {float_or_none}),
}))
for stream in traverse_obj(data, 'streams', expected_type=list):
if stream.get('codec_type') == 'video':
[frames, f_duration] = [float(x) for x in (
stream['avg_frame_rate'].split('/')
if stream.get('avg_frame_rate') else [None, None])]
fmt.update({
**traverse_obj(stream, {
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'vbr': ('bit_rate', {lambda v: float_or_none(v, 1000)}),
}),
**{k: v for k, v in
parse_codecs(stream.get('codec_tag_string')).items() if k != 'acodec'},
'fps': round(frames / f_duration, 1) if frames and f_duration else None,
})
elif stream.get('codec_type') == 'audio':
fmt.update({
**traverse_obj(stream, {
'audio_channels': ('channels', {int_or_none}),
'abr': ('bit_rate', {lambda v: float_or_none(v, 1000)}),
'asr': ('sample_rate', {int_or_none}),
}),
**{k: v for k, v in
parse_codecs(stream.get('codec_tag_string')).items() if k != 'vcodec'},
})
return fmt
# sd, hd formats w/o resolution info should be deprioritized below DASH
fmt['quality'] = q(format_id) - 3
return fmt

def process_formats(info):
for f in info['formats']:
# Downloads with browser's User-Agent are rate limited. Working around
Expand All @@ -803,12 +850,7 @@ def parse_graphql_video(video):
self._merge_subtitles(subs, target=(captions if is_broadcast else subtitles))
else:
q = qualities(['sd', 'hd'])
formats.append({
'format_id': format_id,
# sd, hd formats w/o resolution info should be deprioritized below DASH
'quality': q(format_id) - 3,
'url': playable_url,
})
formats.append(extract_progressive_url(format_id, playable_url))
extract_dash_manifest(fmt_data, formats, [captions if is_broadcast else subtitles])

# videoDeliveryResponse formats extraction
Expand All @@ -826,12 +868,7 @@ def parse_graphql_video(video):
self._merge_subtitles(subs, target=(captions if is_broadcast else subtitles))
for prog_fmt in traverse_obj(fmt_data, ('progressive_urls', lambda _, v: v['progressive_url'])):
format_id = traverse_obj(prog_fmt, ('metadata', 'quality', {str.lower}))
formats.append({
'format_id': format_id,
# sd, hd formats w/o resolution info should be deprioritized below DASH
'quality': q(format_id) - 3,
'url': prog_fmt['progressive_url'],
})
formats.append(extract_progressive_url(format_id, prog_fmt['progressive_url']))
for m3u8_url in traverse_obj(fmt_data, ('hls_playlist_urls', ..., 'hls_playlist_url', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4',
fatal=False, m3u8_id='hls')
Expand Down

0 comments on commit 50b3a65

Please sign in to comment.