[VK] Fix downloading user playlist

Scraping HTML will only get 30 last videos. Use the JSON API to get up to 1000 videos. Fixes #14327
pawitp · Oct 14, 2017 · ca7f609 · ca7f609 · OguzOzdemir · Jan 5, 2019
1 parent c9bd503
commit ca7f609
Showing 1 changed file with 31 additions and 17 deletions.
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
@@ -457,15 +457,29 @@ def _real_extract(self, url):
 class VKUserVideosIE(VKBaseIE):
     IE_NAME = 'vk:uservideos'
     IE_DESC = "VK - User's Videos"
-    _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
-    _TEMPLATE_URL = 'https://vk.com/videos'
+    _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?:.*\bsection=(?P<section>\w+))?(?!\?.*\bz=video)(?:[/?#&]|$)'
+    _TEMPLATE_URL = 'https://vk.com/al_video.php?act=load_videos_silent&al=1&need_albums=0&offset=0&oid=%s&rowlen=3&section=%s'
     _TESTS = [{
-        'url': 'http://vk.com/videos205387401',
+        'url': 'https://vk.com/videos451841516?section=album_1',
         'info_dict': {
-            'id': '205387401',
-            'title': "Tom Cruise's Videos",
+            'id': '451841516',
+            'title': 'album_1',
         },
-        'playlist_mincount': 4,
+        'playlist_count': 39,
+    }, {
+        'url': 'https://m.vk.com/videos451841516',
+        'info_dict': {
+            'id': '451841516',
+            'title': 'all',
+        },
+        'playlist_count': 40,
+    }, {
+        'url': 'https://vk.com/videos451841516',
+        'info_dict': {
+            'id': '451841516',
+            'title': 'all',
+        },
+        'playlist_count': 40,
     }, {
         'url': 'http://vk.com/videos-77521',
         'only_matching': True,
@@ -480,21 +494,21 @@ class VKUserVideosIE(VKBaseIE):
         'only_matching': True,
     }]
 
-    def _real_extract(self, url):
-        page_id = self._match_id(url)
+    def _generate_entry(self, entry):
+        video_id = '%d_%d' % (entry[0], entry[1])
+        return self.url_result('http://vk.com/video' + video_id, 'VK', video_id=video_id)
 
-        webpage = self._download_webpage(url, page_id)
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        page_id = mobj.group('id')
+        section = mobj.group('section') or 'all'
 
-        entries = [
-            self.url_result(
-                'http://vk.com/video' + video_id, 'VK', video_id=video_id)
-            for video_id in orderedSet(re.findall(r'href="/video(-?[0-9_]+)"', webpage))]
+        data = self._download_json(self._TEMPLATE_URL % (page_id, section), page_id,
+            transform_source=lambda s: re.sub(r'.*<!json>(?P<callback_data>.*?)<!>.*', r'\g<callback_data>', s))
 
-        title = unescapeHTML(self._search_regex(
-            r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos',
-            webpage, 'title', default=page_id))
+        entries = [self._generate_entry(entry) for entry in reversed(data[section]['list'])]
 
-        return self.playlist_result(entries, page_id, title)
+        return self.playlist_result(entries, page_id, section)
 
 
 class VKWallPostIE(VKBaseIE):