Skip to content

Commit

Permalink
[wistia] Add support for multiple generic embeds (closes #8347, close…
Browse files Browse the repository at this point in the history
…s #11385)
  • Loading branch information
dstftw authored and bbepis committed Feb 27, 2020
1 parent c7ff7a5 commit b754059
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 23 deletions.
17 changes: 9 additions & 8 deletions youtube_dl/extractor/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2537,14 +2537,15 @@ def _real_extract(self, url):
dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())

# Look for embedded Wistia player
wistia_url = WistiaIE._extract_url(webpage)
if wistia_url:
return {
'_type': 'url_transparent',
'url': self._proto_relative_url(wistia_url),
'ie_key': WistiaIE.ie_key(),
'uploader': video_uploader,
}
wistia_urls = WistiaIE._extract_urls(webpage)
if wistia_urls:
playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
for entry in playlist['entries']:
entry.update({
'_type': 'url_transparent',
'uploader': video_uploader,
})
return playlist

# Look for SVT player
svt_url = SVTIE._extract_url(webpage)
Expand Down
31 changes: 16 additions & 15 deletions youtube_dl/extractor/wistia.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,22 +45,23 @@ class WistiaIE(InfoExtractor):
# https://wistia.com/support/embed-and-share/video-on-your-website
@staticmethod
def _extract_url(webpage):
match = re.search(
r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage)
if match:
return unescapeHTML(match.group('url'))
urls = WistiaIE._extract_urls(webpage)
return urls[0] if urls else None

match = re.search(
r'''(?sx)
<script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2
''', webpage)
if match:
return 'wistia:%s' % match.group('id')

match = re.search(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage)
if match:
return 'wistia:%s' % match.group('id')
@staticmethod
def _extract_urls(webpage):
urls = []
for match in re.finditer(
r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
urls.append(unescapeHTML(match.group('url')))
for match in re.finditer(
r'''(?sx)
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2
''', webpage):
urls.append('wistia:%s' % match.group('id'))
for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
urls.append('wistia:%s' % match.group('id'))
return urls

def _real_extract(self, url):
video_id = self._match_id(url)
Expand Down

0 comments on commit b754059

Please sign in to comment.