Skip to content

Commit

Permalink
[VK] Fix downloading user playlist
Browse files Browse the repository at this point in the history
Scraping HTML will only get 30 last videos. Use the JSON API to get
up to 1000 videos.

Fixes #14327
  • Loading branch information
pawitp committed Oct 14, 2017
1 parent c9bd503 commit 974e4dd
Showing 1 changed file with 32 additions and 17 deletions.
49 changes: 32 additions & 17 deletions youtube_dl/extractor/vk.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,15 +457,29 @@ def _real_extract(self, url):
class VKUserVideosIE(VKBaseIE):
IE_NAME = 'vk:uservideos'
IE_DESC = "VK - User's Videos"
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
_TEMPLATE_URL = 'https://vk.com/videos'
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?:.*\bsection=(?P<section>\w+))?(?!\?.*\bz=video)(?:[/?#&]|$)'
_TEMPLATE_URL = 'https://vk.com/al_video.php?act=load_videos_silent&al=1&need_albums=0&offset=0&oid=%s&rowlen=3&section=%s'
_TESTS = [{
'url': 'http://vk.com/videos205387401',
'url': 'https://vk.com/videos451841516?section=album_1',
'info_dict': {
'id': '205387401',
'title': "Tom Cruise's Videos",
'id': '451841516',
'title': 'album_1',
},
'playlist_mincount': 4,
'playlist_count': 39,
}, {
'url': 'https://m.vk.com/videos451841516',
'info_dict': {
'id': '451841516',
'title': 'all',
},
'playlist_count': 40,
}, {
'url': 'https://vk.com/videos451841516',
'info_dict': {
'id': '451841516',
'title': 'all',
},
'playlist_count': 40,
}, {
'url': 'http://vk.com/videos-77521',
'only_matching': True,
Expand All @@ -480,21 +494,22 @@ class VKUserVideosIE(VKBaseIE):
'only_matching': True,
}]

def _real_extract(self, url):
page_id = self._match_id(url)
def _generate_entry(self, entry):
video_id = '%d_%d' % (entry[0], entry[1])
return self.url_result('http://vk.com/video' + video_id, 'VK', video_id=video_id)

webpage = self._download_webpage(url, page_id)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_id = mobj.group('id')
section = mobj.group('section') or 'all'

entries = [
self.url_result(
'http://vk.com/video' + video_id, 'VK', video_id=video_id)
for video_id in orderedSet(re.findall(r'href="/video(-?[0-9_]+)"', webpage))]
data = self._download_json(
self._TEMPLATE_URL % (page_id, section), page_id,
transform_source=lambda s: re.sub(r'.*<!json>(?P<callback_data>.*?)<!>.*', r'\g<callback_data>', s))

title = unescapeHTML(self._search_regex(
r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos',
webpage, 'title', default=page_id))
entries = [self._generate_entry(entry) for entry in reversed(data[section]['list'])]

return self.playlist_result(entries, page_id, title)
return self.playlist_result(entries, page_id, section)


class VKWallPostIE(VKBaseIE):
Expand Down

0 comments on commit 974e4dd

Please sign in to comment.