Skip to content

Commit

Permalink
Merge pull request #20 from blackjack4494/master
Browse files Browse the repository at this point in the history
release 01.09.2020
  • Loading branch information
blackjack4494 authored Aug 31, 2020
2 parents f661aa8 + 2b5a936 commit 84ad44a
Show file tree
Hide file tree
Showing 15 changed files with 587 additions and 1,127 deletions.
50 changes: 0 additions & 50 deletions .travis.yml.original

This file was deleted.

1,042 changes: 10 additions & 1,032 deletions README.md

Large diffs are not rendered by default.

28 changes: 13 additions & 15 deletions youtube_dl/YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -1805,21 +1805,27 @@ def ensure_dir_exists(path):
self.report_error('Cannot write annotations file: ' + annofn)
return

def dl(name, info):
fd = get_suitable_downloader(info, self.params)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
if self.params.get('verbose'):
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
return fd.download(name, info)

subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])

if subtitles_are_requested and info_dict.get('requested_subtitles'):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['requested_subtitles']
ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
if sub_info.get('data') is not None:
try:
# Use newline='' to prevent conversion of newline characters
Expand All @@ -1831,11 +1837,11 @@ def ensure_dir_exists(path):
return
else:
try:
sub_data = ie._request_webpage(
sub_info['url'], info_dict['id'], note=False).read()
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
subfile.write(sub_data)
except (ExtractorError, IOError, OSError, ValueError) as err:
dl(sub_filename, sub_info)
except (ExtractorError, IOError, OSError, ValueError,
compat_urllib_error.URLError,
compat_http_client.HTTPException,
socket.error) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))
continue
Expand All @@ -1856,14 +1862,6 @@ def ensure_dir_exists(path):

if not self.params.get('skip_download', False):
try:
def dl(name, info):
fd = get_suitable_downloader(info, self.params)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
if self.params.get('verbose'):
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
return fd.download(name, info)

if info_dict.get('requested_formats') is not None:
downloaded = []
success = True
Expand Down
2 changes: 2 additions & 0 deletions youtube_dl/downloader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .dash import DashSegmentsFD
from .rtsp import RtspFD
from .ism import IsmFD
from .youtube_live_chat import YoutubeLiveChatReplayFD
from .external import (
get_external_downloader,
FFmpegFD,
Expand All @@ -26,6 +27,7 @@
'f4m': F4mFD,
'http_dash_segments': DashSegmentsFD,
'ism': IsmFD,
'youtube_live_chat_replay': YoutubeLiveChatReplayFD,
}


Expand Down
94 changes: 94 additions & 0 deletions youtube_dl/downloader/youtube_live_chat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from __future__ import division, unicode_literals

import re
import json

from .fragment import FragmentFD


class YoutubeLiveChatReplayFD(FragmentFD):
""" Downloads YouTube live chat replays fragment by fragment """

FD_NAME = 'youtube_live_chat_replay'

def real_download(self, filename, info_dict):
video_id = info_dict['video_id']
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)

test = self.params.get('test', False)

ctx = {
'filename': filename,
'live': True,
'total_frags': None,
}

def dl_fragment(url):
headers = info_dict.get('http_headers', {})
return self._download_fragment(ctx, url, info_dict, headers)

def parse_yt_initial_data(data):
window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?)(?<=});'
var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?)(?<=});'
for patt in window_patt, var_patt:
try:
raw_json = re.search(patt, data).group(1)
return json.loads(raw_json)
except AttributeError:
continue

self._prepare_and_start_frag_download(ctx)

success, raw_fragment = dl_fragment(
'https://www.youtube.com/watch?v={}'.format(video_id))
if not success:
return False
data = parse_yt_initial_data(raw_fragment)
continuation_id = data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
# no data yet but required to call _append_fragment
self._append_fragment(ctx, b'')

first = True
offset = None
while continuation_id is not None:
data = None
if first:
url = 'https://www.youtube.com/live_chat_replay?continuation={}'.format(continuation_id)
success, raw_fragment = dl_fragment(url)
if not success:
return False
data = parse_yt_initial_data(raw_fragment)
else:
url = ('https://www.youtube.com/live_chat_replay/get_live_chat_replay'
+ '?continuation={}'.format(continuation_id)
+ '&playerOffsetMs={}'.format(offset - 5000)
+ '&hidden=false'
+ '&pbj=1')
success, raw_fragment = dl_fragment(url)
if not success:
return False
data = json.loads(raw_fragment)['response']

first = False
continuation_id = None

live_chat_continuation = data['continuationContents']['liveChatContinuation']
offset = None
processed_fragment = bytearray()
if 'actions' in live_chat_continuation:
for action in live_chat_continuation['actions']:
if 'replayChatItemAction' in action:
replay_chat_item_action = action['replayChatItemAction']
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
processed_fragment.extend(
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']

self._append_fragment(ctx, processed_fragment)

if test or offset is None:
break

self._finish_frag_download(ctx)

return True
71 changes: 71 additions & 0 deletions youtube_dl/extractor/doodstream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# coding: utf-8
from __future__ import unicode_literals

import string
import random
import time

from .common import InfoExtractor


class DoodStreamIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch)/[ed]/(?P<id>[a-z0-9]+)'
_TESTS = [{
'url': 'http://dood.to/e/5s1wmbdacezb',
'md5': '4568b83b31e13242b3f1ff96c55f0595',
'info_dict': {
'id': '5s1wmbdacezb',
'ext': 'mp4',
'title': 'Kat Wonders - Monthly May 2020',
'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
}
}, {
'url': 'https://dood.to/d/jzrxn12t2s7n',
'md5': '3207e199426eca7c2aa23c2872e6728a',
'info_dict': {
'id': 'jzrxn12t2s7n',
'ext': 'mp4',
'title': 'Stacy Cruz Cute ALLWAYSWELL',
'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com',
'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg',
}
}]

def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)

if '/d/' in url:
url = "https://dood.to" + self._html_search_regex(
r'<iframe src="(/e/[a-z0-9]+)"', webpage, 'embed')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)

title = self._html_search_meta(['og:title', 'twitter:title'],
webpage, default=None)
thumb = self._html_search_meta(['og:image', 'twitter:image'],
webpage, default=None)
token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token')
description = self._html_search_meta(
['og:description', 'description', 'twitter:description'],
webpage, default=None)
auth_url = 'https://dood.to' + self._html_search_regex(
r'(/pass_md5.*?)\'', webpage, 'pass_md5')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0',
'referer': url
}

webpage = self._download_webpage(auth_url, video_id, headers=headers)
final_url = webpage + ''.join([random.choice(string.ascii_letters + string.digits) for _ in range(10)]) + "?token=" + token + "&expiry=" + str(int(time.time() * 1000))

return {
'id': video_id,
'title': title,
'url': final_url,
'http_headers': headers,
'ext': 'mp4',
'description': description,
'thumbnail': thumb,
}
6 changes: 6 additions & 0 deletions youtube_dl/extractor/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@
from .discoveryvr import DiscoveryVRIE
from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE
from .doodstream import DoodStreamIE
from .dropbox import DropboxIE
from .dw import (
DWIE,
Expand Down Expand Up @@ -1058,6 +1059,11 @@
BellatorIE,
ParamountNetworkIE,
)
from .storyfire import (
StoryFireIE,
StoryFireUserIE,
StoryFireSeriesIE,
)
from .stitcher import StitcherIE
from .sport5 import Sport5IE
from .sportbox import SportBoxIE
Expand Down
12 changes: 7 additions & 5 deletions youtube_dl/extractor/francetv.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,13 +316,14 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&.]+)'

_TESTS = [{
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
'url': 'https://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-jeudi-22-aout-2019_3561461.html',
'info_dict': {
'id': '84981923',
'id': 'd12458ee-5062-48fe-bfdd-a30d6a01b793',
'ext': 'mp4',
'title': 'Soir 3',
'upload_date': '20130826',
'timestamp': 1377548400,
'upload_date': '20190822',
'timestamp': 1566510900,
'description': 'md5:72d167097237701d6e8452ff03b83c00',
'subtitles': {
'fr': 'mincount:2',
},
Expand Down Expand Up @@ -374,7 +375,8 @@ def _real_extract(self, url):
video_id = self._search_regex(
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
r'id-video=([^@]+@[^"]+)',
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
r'data-id="([^"]+)"'),
webpage, 'video id')

return self._make_url_result(video_id)
Expand Down
Loading

0 comments on commit 84ad44a

Please sign in to comment.