Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 16277 #16716

Closed
wants to merge 20 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
218 changes: 75 additions & 143 deletions youtube_dl/extractor/atresplayer.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,36 @@
from __future__ import unicode_literals

import time
import hmac
import hashlib
import re
import json
from ..compat import compat_HTTPError

from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
sanitized_Request,
urlencode_postdata,
xpath_text,
)

try:
from json import JSONDecodeError
except ImportError:
JSONDecodeError = ValueError


class AtresPlayerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/[^/_]+_(?P<id>[a-zA-Z0-9]+)'
_NETRC_MACHINE = 'atresplayer'
_TESTS = [
{
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
'md5': 'efd56753cda1bb64df52a3074f62e38a',
'url': 'https://www.atresplayer.com/lasexta/programas/el-intermedio/temporada-12/el-intermedio-21-05-18_5b03068d7ed1a8a94b3faf29/',
'md5': '3afa3d3cc155264374916f2a23d1d00c',
'info_dict': {
'id': 'capitulo-10-especial-solidario-nochebuena',
'ext': 'mp4',
'title': 'Especial Solidario de Nochebuena',
'description': 'md5:e2d52ff12214fa937107d21064075bf1',
'duration': 5527.6,
'thumbnail': r're:^https?://.*\.jpg$',
'id': '5b03068d7ed1a8a94b3faf29',
'ext': 'm3u8',
},
'skip': 'This video is only available for registered users'
},
{
'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
'md5': '6e52cbb513c405e403dbacb7aacf8747',
'info_dict': {
'id': 'capitulo-112-david-bustamante',
'ext': 'flv',
'title': 'David Bustamante',
'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6',
'duration': 1439.0,
'thumbnail': r're:^https?://.*\.jpg$',
'params': {
'skip_download': True,
},
'skip': 'required_registered',
},
{
'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
Expand All @@ -53,22 +39,9 @@ class AtresPlayerIE(InfoExtractor):
]

_USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J'
_MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)'
_TIMESTAMP_SHIFT = 30000

_TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json'
_URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json'
_PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s'
_EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s'

_LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check'

_ERRORS = {
'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.',
'DELETED': 'This video has expired and is no longer available for online streaming.',
'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.',
# 'PREMIUM': 'PREMIUM',
}
_PLAYER_URL_TEMPLATE = 'https://api.atresplayer.com/client/v1/page/episode/%s'
_LOGIN_URL = 'https://api.atresplayer.com/login?redirect=https%3A%2F%2Fwww.atresplayer.com'
_LOGIN_ACCOUNT_URL = 'https://account.atresmedia.com/api/login'

def _real_initialize(self):
self._login()
Expand All @@ -79,124 +52,83 @@ def _login(self):
return

login_form = {
'j_username': username,
'j_password': password,
'username': username,
'password': password,
}

self._download_webpage(self._LOGIN_URL, None, 'get login page')
request = sanitized_Request(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inline into actual _download_* call.

Copy link
Author

@Nekmo Nekmo Jul 13, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand this comment.

Copy link
Contributor

@bato3 bato3 Jul 30, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Nekmo You can make json POST with _download_json, and set expected statuses

for POST use data=urlencode_postdata(form_data)

From common.py

      def _download_json(
            self, url_or_request, video_id, note='Downloading JSON metadata',
            errnote='Unable to download JSON metadata', transform_source=None,
            fatal=True, encoding=None, data=None, headers={}, query={},
            expected_status=None):
        """
        Return the JSON object as a dict.

        See _download_webpage docstring for arguments specification.
        """

Copy link
Author

@Nekmo Nekmo Jul 30, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The response from the server is not a json. This request is to set cookies and session.

self._LOGIN_URL, urlencode_postdata(login_form))
self._LOGIN_ACCOUNT_URL,
urlencode_postdata(login_form),
login_form,
method='post')
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
response = self._download_webpage(
request, None, 'Logging in')

error = self._html_search_regex(
r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',
response, 'error', default=None)
if error:
raise ExtractorError(
'Unable to login: %s' % error, expected=True)
try:
response = self._download_json(
request, None, 'post to login form')
except ExtractorError as e:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

expected_status to _download_json instead.

if isinstance(e.cause, compat_HTTPError):
raise self._atres_player_error(e.cause.file.read(), e)
else:
raise
else:
self._download_webpage(response['targetUrl'], None,
'Set login session')

def _atres_player_error(self, body_response, original_exception):
try:
data = json.loads(body_response)
except JSONDecodeError:
return original_exception
if isinstance(data, dict) and 'error' in data:
return ExtractorError('{0} returned error: {1} ({2})'.format(
self.IE_NAME, data['error'], data.get(
'error_description', 'There is no description')
), expected=True)
else:
return original_exception

def _real_extract(self, url):
video_id = self._match_id(url)

webpage = self._download_webpage(url, video_id)

episode_id = self._search_regex(
r'episode="([^"]+)"', webpage, 'episode id')

request = sanitized_Request(
self._PLAYER_URL_TEMPLATE % episode_id,
self._PLAYER_URL_TEMPLATE % video_id,
headers={'User-Agent': self._USER_AGENT})
player = self._download_json(request, episode_id, 'Downloading player JSON')

episode_type = player.get('typeOfEpisode')
error_message = self._ERRORS.get(episode_type)
if error_message:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
player = self._download_json(request, video_id,
'Downloading player JSON')

formats = []
video_url = player.get('urlVideo')
if video_url:
format_info = {
'url': video_url,
'format_id': 'http',
}
mobj = re.search(r'(?P<bitrate>\d+)K_(?P<width>\d+)x(?P<height>\d+)', video_url)
if mobj:
format_info.update({
'width': int_or_none(mobj.group('width')),
'height': int_or_none(mobj.group('height')),
'tbr': int_or_none(mobj.group('bitrate')),
})
formats.append(format_info)

timestamp = int_or_none(self._download_webpage(
self._TIME_API_URL,
video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
token = hmac.new(
self._MAGIC.encode('ascii'),
(episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5
).hexdigest()

request = sanitized_Request(
self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token),
video_url,
headers={'User-Agent': self._USER_AGENT})
try:
video_data = self._download_json(request, video_id,
'Downloading video JSON',
fatal=True)
except ExtractorError as e:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

expected_status to _download_json instead.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't find "expected_status".

if len(e.exc_info) <= 1 or e.exc_info[1].code != 403:
raise
raise self._atres_player_error(e.exc_info[1].file.read(), e)

for source in video_data['sources']:
if source.get('type') == 'application/dash+xml':
formats.extend(self._extract_mpd_formats(
source['src'], video_id, mpd_id='dash',
fatal=False))
elif source.get('type') == 'application/vnd.apple.mpegurl':
formats.extend(self._extract_m3u8_formats(
source['src'], video_id,
fatal=False))

fmt_json = self._download_json(
request, video_id, 'Downloading windows video JSON')

result = fmt_json.get('resultDes')
if result.lower() != 'ok':
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, result), expected=True)

for format_id, video_url in fmt_json['resultObject'].items():
if format_id == 'token' or not video_url.startswith('http'):
continue
if 'geodeswowsmpra3player' in video_url:
# f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
# f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
# this videos are protected by DRM, the f4m downloader doesn't support them
continue
video_url_hd = video_url.replace('free_es', 'es')
formats.extend(self._extract_f4m_formats(
video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds',
fatal=False))
formats.extend(self._extract_mpd_formats(
video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash',
fatal=False))
self._sort_formats(formats)

path_data = player.get('pathData')

episode = self._download_xml(
self._EPISODE_URL_TEMPLATE % path_data, video_id,
'Downloading episode XML')

duration = float_or_none(xpath_text(
episode, './media/asset/info/technical/contentDuration', 'duration'))

art = episode.find('./media/asset/info/art')
title = xpath_text(art, './name', 'title')
description = xpath_text(art, './description', 'description')
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')

subtitles = {}
subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
if subtitle_url:
subtitles['es'] = [{
'ext': 'srt',
'url': subtitle_url,
}]

return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'title': video_data.get('titulo'),
'description': video_data.get('descripcion'),
'thumbnail': video_data.get('imgPoster'),
'duration': video_data.get('duration'),
'formats': formats,
'subtitles': subtitles,
}