ytdl-org · dirkf · Mar 6, 2021 · Mar 5, 2021 · Mar 22, 2021 · Mar 23, 2021
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
@@ -1318,7 +1318,7 @@ def _entries(self, webpage, url, playlist_id):
             if single_page:
                 return
             next_page = self._search_regex(
-                r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
+                r'(?:<li[^>]+class=(["\'])pagination_+next\1[^>]*>\s*<a|<a[^>]+\baria-label=(["\'])Next Page\2)[^>]+href=(["\'])(?P<url>(?:(?!\3).)+)\3',
                 webpage, 'next page url', default=None, group='url')
             if not next_page:
                 break
@@ -1328,6 +1328,7 @@ def _entries(self, webpage, url, playlist_id):
 
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
+        self._playlist_id = playlist_id
 
         webpage = self._download_webpage(url, playlist_id)
 
@@ -1342,7 +1343,10 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
     IE_NAME = 'bbc.co.uk:iplayer:playlist'
     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
     _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
-    _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
+    _VIDEO_ID_TEMPLATE = r'"href":\s*"/iplayer/episode/(%s)/'
+    _SERIES_ID_TEMPLATE = '/iplayer/episodes/%s/.+[?&]seriesId=(%s)'
+    _SERIES_URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s/episodes/player'
+
     _TESTS = [{
         'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
         'info_dict': {
@@ -1358,16 +1362,47 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
         'info_dict': {
             'id': 'p02tcc32',
             'title': 'Bohemian Icons',
-            'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
+            'description': 'md5:8b60017680e9f3115e79e0c20697a585',
+        },
+        'playlist_mincount': 10,
+    }, {
+        # Playlist with more than one series/season
+        'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
+        'info_dict': {
+            'id': 'b094m5t9',
+            'title': 'Doctor Foster',
+            'description': 'A trusted GP sees her charmed life explode when she suspects her husband of an affair.',
         },
         'playlist_mincount': 10,
+    }, {
+        # Playlist with more than one page
+        'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
+        'info_dict': {
+            'id': 'm0004c4v',
+            'title': 'Beechgrove',
+            'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
+        },
+        'playlist_mincount': 37,
     }]
 
+    def _entries(self, webpage, url, playlist_id):
+        for entry in super(BBCCoUkIPlayerPlaylistIE, self)._entries(webpage, url, playlist_id):
+            yield entry
+        for series_id in re.findall(self._SERIES_ID_TEMPLATE % (playlist_id, BBCCoUkIE._ID_REGEX), webpage):
+            yield self.url_result(self._SERIES_URL_TEMPLATE % series_id)
+
     def _extract_title_and_description(self, webpage):
-        title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
-        description = self._search_regex(
-            r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>',
-            webpage, 'description', fatal=False, group='value')
+        redux_state = self._parse_json(self._html_search_regex(
+            r'<script[^>]+id=(["\'])tvip-script-app-store\1[^>]*>[^<]*_REDUX_STATE__\s*=\s*(?P<json>[^<]+)\s*;\s*<',
+            webpage, 'redux state', default='{}', group='json'), self._playlist_id, fatal=False)
+        if redux_state:
+            redux_hdr = redux_state.get('header') or {}
+            redux_hdr.update(redux_state.get('page') or {})
+            redux_state = redux_hdr
+        title = redux_state.get('title') or self._og_search_title(webpage, fatal=False)
+        description = redux_state.get('description') or \
+            self._html_search_meta('description', webpage, default=None) or \
+            self._og_search_description(webpage)
         return title, description