Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release 01.09.2020 #20

Merged
merged 43 commits into from
Aug 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
bf52996
Update api call to v5 spec in TwitchPlaylistBaseIE
geauxlo Jun 10, 2020
3951a7f
Prefer API to scraping HTML when possible
geauxlo Jun 10, 2020
66c498e
Recognize more valid URLs
geauxlo Jun 10, 2020
33afd66
UNDO
geauxlo Jun 10, 2020
777d5a4
[postprocessor/embedthumbnail] Add conversion for non JPG/PNG images
alexmerkel Jun 18, 2020
e987deb
[postprocessor/embedthumbnail] Add detection for mislabeled WebP files
alexmerkel Jun 21, 2020
ac0ad4f
[postprocessor/embedthumbnail] Close file before possible renaming
alexmerkel Jun 21, 2020
6011dd9
[postprocessor/embedthumbnail] Fix comments to make flake8 happy
alexmerkel Jun 21, 2020
f6513e1
[postprocessor/embedthumbnail] Replace % with _ in ffmpeg image outpu…
alexmerkel Jun 23, 2020
cae18ef
[youtube] Fix age gate detection
random-nick Jul 23, 2020
98b6982
use dl function for subtitles
siikamiika Aug 3, 2020
a78e3a5
support youtube live chat replay
siikamiika Aug 4, 2020
321bf82
check live chat replay existence
siikamiika Aug 5, 2020
7627f54
run flake8
siikamiika Aug 5, 2020
f96f5dd
rename variable
siikamiika Aug 5, 2020
7cd9e2a
attempt to fix syntax error on older python
siikamiika Aug 5, 2020
88a68db
flake8
siikamiika Aug 5, 2020
f0f76a3
fix premiere live chat
siikamiika Aug 5, 2020
eaedbfd
fix ytInitialData parsing
siikamiika Aug 10, 2020
15eae44
harden regex with lookbehind
siikamiika Aug 10, 2020
edd8310
Use initial data from JS instead to get chapters
gschizas Aug 11, 2020
34675f9
[videa] Adapt to updates
adrianheine Aug 11, 2020
13c30d1
[francetv] fix extractor
Aug 14, 2020
b50f352
[doodstream] new extractor
sxvghd Aug 16, 2020
8d6df01
[StoryFire] Add new extractor
sgstair Aug 17, 2020
c00c157
Styling changes
sxvghd Aug 17, 2020
92d2599
[doodstream] flake8 compliance
sxvghd Aug 17, 2020
9f13f9d
Merge branch 'master' of https://github.com/ytdl-org/youtube-dl into …
gschizas Aug 28, 2020
ba39289
Resolve audio/x-wav to "wav" extension, rather than "x-wav" (which ff…
tfvlrue Aug 28, 2020
a6c7c9c
[skip travis] remove original travis config
blackjack4494 Aug 31, 2020
9ab4161
[skip travis] update readme.
blackjack4494 Aug 31, 2020
4bd95eb
Merge pull request #6 from sgstair/storyfire
blackjack4494 Aug 31, 2020
ffc632a
Merge pull request #7 from sxvghd/doodstream
blackjack4494 Aug 31, 2020
70ef7d5
Merge pull request #8 from geauxlo/master
blackjack4494 Aug 31, 2020
70445b0
Merge pull request #9 from Surkal/francetv
blackjack4494 Aug 31, 2020
7f3558b
Merge pull request #10 from adrianheine/videa
blackjack4494 Aug 31, 2020
ba30410
Merge pull request #11 from gschizas/bugfix/youtube/chapters-fix-extr…
blackjack4494 Aug 31, 2020
a9c0690
Merge pull request #12 from siikamiika/youtube-live-chat
blackjack4494 Aug 31, 2020
90b7b5e
Merge pull request #13 from random-nick/master
blackjack4494 Aug 31, 2020
1e42995
Merge pull request #14 from alexmerkel/webpfix
blackjack4494 Aug 31, 2020
fbfeb7c
Merge pull request #19 from tfvlrue/master
blackjack4494 Aug 31, 2020
992083b
[skip travis] version bump
blackjack4494 Aug 31, 2020
2b5a936
Merge remote-tracking branch 'origin/master'
blackjack4494 Aug 31, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 0 additions & 50 deletions .travis.yml.original

This file was deleted.

1,042 changes: 10 additions & 1,032 deletions README.md

Large diffs are not rendered by default.

28 changes: 13 additions & 15 deletions youtube_dl/YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -1805,21 +1805,27 @@ def ensure_dir_exists(path):
self.report_error('Cannot write annotations file: ' + annofn)
return

def dl(name, info):
fd = get_suitable_downloader(info, self.params)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
if self.params.get('verbose'):
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
return fd.download(name, info)

subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])

if subtitles_are_requested and info_dict.get('requested_subtitles'):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['requested_subtitles']
ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
if sub_info.get('data') is not None:
try:
# Use newline='' to prevent conversion of newline characters
Expand All @@ -1831,11 +1837,11 @@ def ensure_dir_exists(path):
return
else:
try:
sub_data = ie._request_webpage(
sub_info['url'], info_dict['id'], note=False).read()
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
subfile.write(sub_data)
except (ExtractorError, IOError, OSError, ValueError) as err:
dl(sub_filename, sub_info)
except (ExtractorError, IOError, OSError, ValueError,
compat_urllib_error.URLError,
compat_http_client.HTTPException,
socket.error) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))
continue
Expand All @@ -1856,14 +1862,6 @@ def ensure_dir_exists(path):

if not self.params.get('skip_download', False):
try:
def dl(name, info):
fd = get_suitable_downloader(info, self.params)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
if self.params.get('verbose'):
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
return fd.download(name, info)

if info_dict.get('requested_formats') is not None:
downloaded = []
success = True
Expand Down
2 changes: 2 additions & 0 deletions youtube_dl/downloader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .dash import DashSegmentsFD
from .rtsp import RtspFD
from .ism import IsmFD
from .youtube_live_chat import YoutubeLiveChatReplayFD
from .external import (
get_external_downloader,
FFmpegFD,
Expand All @@ -26,6 +27,7 @@
'f4m': F4mFD,
'http_dash_segments': DashSegmentsFD,
'ism': IsmFD,
'youtube_live_chat_replay': YoutubeLiveChatReplayFD,
}


Expand Down
94 changes: 94 additions & 0 deletions youtube_dl/downloader/youtube_live_chat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from __future__ import division, unicode_literals

import re
import json

from .fragment import FragmentFD


class YoutubeLiveChatReplayFD(FragmentFD):
""" Downloads YouTube live chat replays fragment by fragment """

FD_NAME = 'youtube_live_chat_replay'

def real_download(self, filename, info_dict):
video_id = info_dict['video_id']
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)

test = self.params.get('test', False)

ctx = {
'filename': filename,
'live': True,
'total_frags': None,
}

def dl_fragment(url):
headers = info_dict.get('http_headers', {})
return self._download_fragment(ctx, url, info_dict, headers)

def parse_yt_initial_data(data):
window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?)(?<=});'
var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?)(?<=});'
for patt in window_patt, var_patt:
try:
raw_json = re.search(patt, data).group(1)
return json.loads(raw_json)
except AttributeError:
continue

self._prepare_and_start_frag_download(ctx)

success, raw_fragment = dl_fragment(
'https://www.youtube.com/watch?v={}'.format(video_id))
if not success:
return False
data = parse_yt_initial_data(raw_fragment)
continuation_id = data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
# no data yet but required to call _append_fragment
self._append_fragment(ctx, b'')

first = True
offset = None
while continuation_id is not None:
data = None
if first:
url = 'https://www.youtube.com/live_chat_replay?continuation={}'.format(continuation_id)
success, raw_fragment = dl_fragment(url)
if not success:
return False
data = parse_yt_initial_data(raw_fragment)
else:
url = ('https://www.youtube.com/live_chat_replay/get_live_chat_replay'
+ '?continuation={}'.format(continuation_id)
+ '&playerOffsetMs={}'.format(offset - 5000)
+ '&hidden=false'
+ '&pbj=1')
success, raw_fragment = dl_fragment(url)
if not success:
return False
data = json.loads(raw_fragment)['response']

first = False
continuation_id = None

live_chat_continuation = data['continuationContents']['liveChatContinuation']
offset = None
processed_fragment = bytearray()
if 'actions' in live_chat_continuation:
for action in live_chat_continuation['actions']:
if 'replayChatItemAction' in action:
replay_chat_item_action = action['replayChatItemAction']
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
processed_fragment.extend(
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']

self._append_fragment(ctx, processed_fragment)

if test or offset is None:
break

self._finish_frag_download(ctx)

return True
71 changes: 71 additions & 0 deletions youtube_dl/extractor/doodstream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# coding: utf-8
from __future__ import unicode_literals

import string
import random
import time

from .common import InfoExtractor


class DoodStreamIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch)/[ed]/(?P<id>[a-z0-9]+)'
_TESTS = [{
'url': 'http://dood.to/e/5s1wmbdacezb',
'md5': '4568b83b31e13242b3f1ff96c55f0595',
'info_dict': {
'id': '5s1wmbdacezb',
'ext': 'mp4',
'title': 'Kat Wonders - Monthly May 2020',
'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
}
}, {
'url': 'https://dood.to/d/jzrxn12t2s7n',
'md5': '3207e199426eca7c2aa23c2872e6728a',
'info_dict': {
'id': 'jzrxn12t2s7n',
'ext': 'mp4',
'title': 'Stacy Cruz Cute ALLWAYSWELL',
'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com',
'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg',
}
}]

def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)

if '/d/' in url:
url = "https://dood.to" + self._html_search_regex(
r'<iframe src="(/e/[a-z0-9]+)"', webpage, 'embed')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)

title = self._html_search_meta(['og:title', 'twitter:title'],
webpage, default=None)
thumb = self._html_search_meta(['og:image', 'twitter:image'],
webpage, default=None)
token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token')
description = self._html_search_meta(
['og:description', 'description', 'twitter:description'],
webpage, default=None)
auth_url = 'https://dood.to' + self._html_search_regex(
r'(/pass_md5.*?)\'', webpage, 'pass_md5')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0',
'referer': url
}

webpage = self._download_webpage(auth_url, video_id, headers=headers)
final_url = webpage + ''.join([random.choice(string.ascii_letters + string.digits) for _ in range(10)]) + "?token=" + token + "&expiry=" + str(int(time.time() * 1000))

return {
'id': video_id,
'title': title,
'url': final_url,
'http_headers': headers,
'ext': 'mp4',
'description': description,
'thumbnail': thumb,
}
6 changes: 6 additions & 0 deletions youtube_dl/extractor/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@
from .discoveryvr import DiscoveryVRIE
from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE
from .doodstream import DoodStreamIE
from .dropbox import DropboxIE
from .dw import (
DWIE,
Expand Down Expand Up @@ -1058,6 +1059,11 @@
BellatorIE,
ParamountNetworkIE,
)
from .storyfire import (
StoryFireIE,
StoryFireUserIE,
StoryFireSeriesIE,
)
from .stitcher import StitcherIE
from .sport5 import Sport5IE
from .sportbox import SportBoxIE
Expand Down
12 changes: 7 additions & 5 deletions youtube_dl/extractor/francetv.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,13 +316,14 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&.]+)'

_TESTS = [{
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
'url': 'https://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-jeudi-22-aout-2019_3561461.html',
'info_dict': {
'id': '84981923',
'id': 'd12458ee-5062-48fe-bfdd-a30d6a01b793',
'ext': 'mp4',
'title': 'Soir 3',
'upload_date': '20130826',
'timestamp': 1377548400,
'upload_date': '20190822',
'timestamp': 1566510900,
'description': 'md5:72d167097237701d6e8452ff03b83c00',
'subtitles': {
'fr': 'mincount:2',
},
Expand Down Expand Up @@ -374,7 +375,8 @@ def _real_extract(self, url):
video_id = self._search_regex(
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
r'id-video=([^@]+@[^"]+)',
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
r'data-id="([^"]+)"'),
webpage, 'video id')

return self._make_url_result(video_id)
Expand Down
Loading