Skip to content

Commit

Permalink
Merge pull request #177 from blackjack4494/master
Browse files Browse the repository at this point in the history
Release 2020.10.09
  • Loading branch information
blackjack4494 authored Oct 9, 2020
2 parents f3e826e + f955ad7 commit 7419a53
Show file tree
Hide file tree
Showing 14 changed files with 578 additions and 56 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,5 @@ venv/

# VS Code related files
.vscode

cookies.txt
302 changes: 302 additions & 0 deletions README.md

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions youtube_dlc/extractor/bandcamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ def _real_extract(self, url):
webpage, 'track info', default='{}')

track_info = self._parse_json(trackinfo_block, title)

if track_info:
file_ = track_info.get('file')
if isinstance(file_, dict):
Expand All @@ -115,7 +114,7 @@ def _real_extract(self, url):
'acodec': ext,
'abr': int_or_none(abr_str),
})
track = track_info.get('title')

track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
track_number = int_or_none(track_info.get('track_num'))
duration = float_or_none(track_info.get('duration'))
Expand All @@ -126,6 +125,7 @@ def extract(key):
webpage, key, default=None, group='value')
return data.replace(r'\"', '"').replace('\\\\', '\\') if data else data

track = extract('title')
artist = extract('artist')
album = extract('album_title')
timestamp = unified_timestamp(
Expand Down
2 changes: 2 additions & 0 deletions youtube_dlc/extractor/bet.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from .mtv import MTVServicesInfoExtractor
from ..utils import unified_strdate

# TODO Remove - Reason: Outdated Site


class BetIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
Expand Down
6 changes: 4 additions & 2 deletions youtube_dlc/extractor/cmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from .mtv import MTVIE

# TODO Remove - Reason: Outdated Site


class CMTIE(MTVIE):
IE_NAME = 'cmt.com'
Expand Down Expand Up @@ -39,7 +41,7 @@ class CMTIE(MTVIE):
'only_matching': True,
}]

def _extract_mgid(self, webpage):
def _extract_mgid(self, webpage, url):
mgid = self._search_regex(
r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1',
webpage, 'mgid', group='mgid', default=None)
Expand All @@ -50,5 +52,5 @@ def _extract_mgid(self, webpage):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
mgid = self._extract_mgid(webpage)
mgid = self._extract_mgid(webpage, url)
return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
2 changes: 1 addition & 1 deletion youtube_dlc/extractor/comedycentral.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1')
mgid = self._extract_mgid(webpage, url, data_zone='t2_lc_promo1')
videos_info = self._get_videos_info(mgid)
return videos_info

Expand Down
7 changes: 5 additions & 2 deletions youtube_dlc/extractor/expressen.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
class ExpressenIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:www\.)?expressen\.se/
(?:www\.)?(?:expressen|di)\.se/
(?:(?:tvspelare/video|videoplayer/embed)/)?
tv/(?:[^/]+/)*
(?P<id>[^/?#&]+)
Expand All @@ -42,13 +42,16 @@ class ExpressenIE(InfoExtractor):
}, {
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
'only_matching': True,
}, {
'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
'only_matching': True,
}]

@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
webpage)]

def _real_extract(self, url):
Expand Down
3 changes: 2 additions & 1 deletion youtube_dlc/extractor/iprima.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ def _real_extract(self, url):
(r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
r'data-product="([^"]+)">',
r'id=["\']player-(p\d+)"',
r'playerId\s*:\s*["\']player-(p\d+)'),
r'playerId\s*:\s*["\']player-(p\d+)',
r'\bvideos\s*=\s*["\'](p\d+)'),
webpage, 'real id')

playerpage = self._download_webpage(
Expand Down
46 changes: 43 additions & 3 deletions youtube_dlc/extractor/mtv.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from ..compat import (
compat_str,
compat_xpath,
compat_urlparse,
)
from ..utils import (
ExtractorError,
Expand All @@ -22,6 +23,7 @@
unescapeHTML,
update_url_query,
url_basename,
get_domain,
xpath_text,
)

Expand Down Expand Up @@ -253,7 +255,42 @@ def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):

return try_get(feed, lambda x: x['result']['data']['id'], compat_str)

def _extract_mgid(self, webpage):
def _extract_new_triforce_mgid(self, webpage, url='', video_id=None):
# print(compat_urlparse.urlparse(url).netloc)
if url == '':
return
domain = get_domain(url)
if domain is None:
raise ExtractorError(
'[%s] could not get domain' % self.IE_NAME,
expected=True)
url = url.replace("https://", "http://")
enc_url = compat_urlparse.quote(url, safe='')
_TRIFORCE_V8_TEMPLATE = 'https://%s/feeds/triforce/manifest/v8?url=%s'
triforce_manifest_url = _TRIFORCE_V8_TEMPLATE % (domain, enc_url)

manifest = self._download_json(triforce_manifest_url, video_id, fatal=False)
if manifest:
if manifest.get('manifest').get('type') == 'redirect':
self.to_screen('Found a redirect. Downloading manifest from new location')
new_loc = manifest.get('manifest').get('newLocation')
new_loc = new_loc.replace("https://", "http://")
enc_new_loc = compat_urlparse.quote(new_loc, safe='')
triforce_manifest_new_loc = _TRIFORCE_V8_TEMPLATE % (domain, enc_new_loc)
manifest = self._download_json(triforce_manifest_new_loc, video_id, fatal=False)

item_id = try_get(manifest, lambda x: x['manifest']['reporting']['itemId'], compat_str)
if not item_id:
self.to_screen('Found no id!')
return

# 'episode' can be anything. 'content' is used often as well
_MGID_TEMPLATE = 'mgid:arc:episode:%s:%s'
mgid = _MGID_TEMPLATE % (domain, item_id)

return mgid

def _extract_mgid(self, webpage, url, data_zone=None):
try:
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
# or http://media.mtvnservices.com/{mgid}
Expand All @@ -276,14 +313,17 @@ def _extract_mgid(self, webpage):
r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None)

if not mgid:
mgid = self._extract_triforce_mgid(webpage)
mgid = self._extract_new_triforce_mgid(webpage, url)

if not mgid:
mgid = self._extract_triforce_mgid(webpage, data_zone)

return mgid

def _real_extract(self, url):
title = url_basename(url)
webpage = self._download_webpage(url, title)
mgid = self._extract_mgid(webpage)
mgid = self._extract_mgid(webpage, url)
videos_info = self._get_videos_info(mgid)
return videos_info

Expand Down
2 changes: 1 addition & 1 deletion youtube_dlc/extractor/nick.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,5 +245,5 @@ class NickRuIE(MTVServicesInfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
mgid = self._extract_mgid(webpage)
mgid = self._extract_mgid(webpage, url)
return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
16 changes: 13 additions & 3 deletions youtube_dlc/extractor/spike.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,18 @@ class BellatorIE(MTVServicesInfoExtractor):
_FEED_URL = 'http://www.bellator.com/feeds/mrss/'
_GEO_COUNTRIES = ['US']

def _extract_mgid(self, webpage):
return self._extract_triforce_mgid(webpage)
def _extract_mgid(self, webpage, url):
mgid = None

if not mgid:
mgid = self._extract_triforce_mgid(webpage)

if not mgid:
mgid = self._extract_new_triforce_mgid(webpage, url)

return mgid

# TODO Remove - Reason: Outdated Site


class ParamountNetworkIE(MTVServicesInfoExtractor):
Expand All @@ -43,7 +53,7 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
_FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/'
_GEO_COUNTRIES = ['US']

def _extract_mgid(self, webpage):
def _extract_mgid(self, webpage, url):
root_data = self._parse_json(self._search_regex(
r'window\.__DATA__\s*=\s*({.+})',
webpage, 'data'), None)
Expand Down
2 changes: 2 additions & 0 deletions youtube_dlc/extractor/vh1.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

from .mtv import MTVServicesInfoExtractor

# TODO Remove - Reason: Outdated Site


class VH1IE(MTVServicesInfoExtractor):
IE_NAME = 'vh1.com'
Expand Down
Loading

0 comments on commit 7419a53

Please sign in to comment.