Skip to content

Commit

Permalink
[VK] Fix downloading user playlist
Browse files Browse the repository at this point in the history
Scraping HTML will only get 30 last videos. Use the JSON API to get
up to 1000 videos.

Fixes #14327
  • Loading branch information
pawitp committed Oct 14, 2017
1 parent c9bd503 commit ca7f609
Showing 1 changed file with 31 additions and 17 deletions.
48 changes: 31 additions & 17 deletions youtube_dl/extractor/vk.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,15 +457,29 @@ def _real_extract(self, url):
class VKUserVideosIE(VKBaseIE):
IE_NAME = 'vk:uservideos'
IE_DESC = "VK - User's Videos"
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
_TEMPLATE_URL = 'https://vk.com/videos'
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?:.*\bsection=(?P<section>\w+))?(?!\?.*\bz=video)(?:[/?#&]|$)'
_TEMPLATE_URL = 'https://vk.com/al_video.php?act=load_videos_silent&al=1&need_albums=0&offset=0&oid=%s&rowlen=3&section=%s'
_TESTS = [{
'url': 'http://vk.com/videos205387401',
'url': 'https://vk.com/videos451841516?section=album_1',
'info_dict': {
'id': '205387401',
'title': "Tom Cruise's Videos",
'id': '451841516',
'title': 'album_1',
},
'playlist_mincount': 4,
'playlist_count': 39,
}, {
'url': 'https://m.vk.com/videos451841516',
'info_dict': {
'id': '451841516',
'title': 'all',
},
'playlist_count': 40,
}, {
'url': 'https://vk.com/videos451841516',
'info_dict': {
'id': '451841516',
'title': 'all',
},
'playlist_count': 40,
}, {
'url': 'http://vk.com/videos-77521',
'only_matching': True,
Expand All @@ -480,21 +494,21 @@ class VKUserVideosIE(VKBaseIE):
'only_matching': True,
}]

def _real_extract(self, url):
page_id = self._match_id(url)
def _generate_entry(self, entry):
video_id = '%d_%d' % (entry[0], entry[1])
return self.url_result('http://vk.com/video' + video_id, 'VK', video_id=video_id)

webpage = self._download_webpage(url, page_id)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_id = mobj.group('id')
section = mobj.group('section') or 'all'

entries = [
self.url_result(
'http://vk.com/video' + video_id, 'VK', video_id=video_id)
for video_id in orderedSet(re.findall(r'href="/video(-?[0-9_]+)"', webpage))]
data = self._download_json(self._TEMPLATE_URL % (page_id, section), page_id,
transform_source=lambda s: re.sub(r'.*<!json>(?P<callback_data>.*?)<!>.*', r'\g<callback_data>', s))

title = unescapeHTML(self._search_regex(
r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos',
webpage, 'title', default=page_id))
entries = [self._generate_entry(entry) for entry in reversed(data[section]['list'])]

return self.playlist_result(entries, page_id, title)
return self.playlist_result(entries, page_id, section)


class VKWallPostIE(VKBaseIE):
Expand Down

1 comment on commit ca7f609

@OguzOzdemir
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any progress for this topic ? we are waiting for fix Vk playlist

Please sign in to comment.