Merge branch 'yt-dlp:master' into master

saintliao · Mar 6, 2024 · bcad870 · bcad870
2 parents 84cfbff + cf91400
commit bcad870
Show file tree

Hide file tree

Showing 13 changed files with 286 additions and 169 deletions.
diff --git a/README.md b/README.md
@@ -218,7 +218,7 @@ Example usage:
 yt-dlp --update-to nightly
 
 # To install nightly with pip:
-python -m pip install -U --pre yt-dlp
+python -m pip install -U --pre yt-dlp[default]
 ```
 
 <!-- MANPAGE: BEGIN EXCLUDED SECTION -->

diff --git a/devscripts/install_deps.py b/devscripts/install_deps.py
@@ -19,7 +19,7 @@ def parse_args():
     parser.add_argument(
         'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)')
     parser.add_argument(
-        '-e', '--exclude', metavar='REQUIREMENT', action='append', help='Exclude a required dependency')
+        '-e', '--exclude', metavar='DEPENDENCY', action='append', help='Exclude a dependency')
     parser.add_argument(
         '-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group')
     parser.add_argument(
@@ -33,21 +33,28 @@ def parse_args():
 
 def main():
     args = parse_args()
-    toml_data = parse_toml(read_file(args.input))
-    deps = toml_data['project']['dependencies']
-    targets = deps.copy() if not args.only_optional else []
-
-    for exclude in args.exclude or []:
-        for dep in deps:
-            simplified_dep = re.match(r'[\w-]+', dep)[0]
-            if dep in targets and (exclude.lower() == simplified_dep.lower() or exclude == dep):
-                targets.remove(dep)
-
-    optional_deps = toml_data['project']['optional-dependencies']
-    for include in args.include or []:
-        group = optional_deps.get(include)
-        if group:
-            targets.extend(group)
+    project_table = parse_toml(read_file(args.input))['project']
+    optional_groups = project_table['optional-dependencies']
+    excludes = args.exclude or []
+
+    deps = []
+    if not args.only_optional:  # `-o` should exclude 'dependencies' and the 'default' group
+        deps.extend(project_table['dependencies'])
+        if 'default' not in excludes:  # `--exclude default` should exclude entire 'default' group
+            deps.extend(optional_groups['default'])
+
+    def name(dependency):
+        return re.match(r'[\w-]+', dependency)[0].lower()
+
+    target_map = {name(dep): dep for dep in deps}
+
+    for include in filter(None, map(optional_groups.get, args.include or [])):
+        target_map.update(zip(map(name, include), include))
+
+    for exclude in map(name, excludes):
+        target_map.pop(exclude, None)
+
+    targets = list(target_map.values())
 
     if args.print:
         for target in targets:

diff --git a/pyproject.toml b/pyproject.toml
@@ -51,6 +51,7 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
+default = []
 secretstorage = [
     "cffi",
     "secretstorage",

diff --git a/test/test_websockets.py b/test/test_websockets.py
@@ -192,8 +192,8 @@ def test_raise_http_error(self, handler, status):
 
     @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
     @pytest.mark.parametrize('params,extensions', [
-        ({'timeout': 0.00001}, {}),
-        ({}, {'timeout': 0.00001}),
+        ({'timeout': sys.float_info.min}, {}),
+        ({}, {'timeout': sys.float_info.min}),
     ])
     def test_timeout(self, handler, params, extensions):
         with handler(**params) as rh:

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
@@ -444,6 +444,7 @@
 from .dailymotion import (
     DailymotionIE,
     DailymotionPlaylistIE,
+    DailymotionSearchIE,
     DailymotionUserIE,
 )
 from .dailywire import (

diff --git a/yt_dlp/extractor/cctv.py b/yt_dlp/extractor/cctv.py
@@ -88,6 +88,20 @@ class CCTVIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        # videoCenterId: "id"
+        'url': 'http://news.cctv.com/2024/02/21/ARTIcU5tKIOIF2myEGCATkLo240221.shtml',
+        'info_dict': {
+            'id': '5c846c0518444308ba32c4159df3b3e0',
+            'ext': 'mp4',
+            'title': '《平“语”近人——习近平喜欢的典故》第三季 第5集：风物长宜放眼量',
+            'uploader': 'yangjuan',
+            'timestamp': 1708554940,
+            'upload_date': '20240221',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         # var ids = ["id"]
         'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml',
@@ -128,7 +142,7 @@ def _real_extract(self, url):
 
         video_id = self._search_regex(
             [r'var\s+guid\s*=\s*["\']([\da-fA-F]+)',
-             r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)',
+             r'videoCenterId(?:["\']\s*,|:)\s*["\']([\da-fA-F]+)',
              r'changePlayer\s*\(\s*["\']([\da-fA-F]+)',
              r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)',
              r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)',

diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py
@@ -1,6 +1,7 @@
 import functools
 import json
 import re
+import urllib.parse
 
 from .common import InfoExtractor
 from ..networking.exceptions import HTTPError
@@ -44,36 +45,41 @@ def _real_initialize(self):
         self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self.get_param('age_limit'))
         self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off')
 
+    def _get_token(self, xid):
+        cookies = self._get_dailymotion_cookies()
+        token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
+        if token:
+            return token
+
+        data = {
+            'client_id': 'f1a362d288c1b98099c7',
+            'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
+        }
+        username, password = self._get_login_info()
+        if username:
+            data.update({
+                'grant_type': 'password',
+                'password': password,
+                'username': username,
+            })
+        else:
+            data['grant_type'] = 'client_credentials'
+        try:
+            token = self._download_json(
+                'https://graphql.api.dailymotion.com/oauth/token',
+                None, 'Downloading Access Token',
+                data=urlencode_postdata(data))['access_token']
+        except ExtractorError as e:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 400:
+                raise ExtractorError(self._parse_json(
+                    e.cause.response.read().decode(), xid)['error_description'], expected=True)
+            raise
+        self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
+        return token
+
     def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
         if not self._HEADERS.get('Authorization'):
-            cookies = self._get_dailymotion_cookies()
-            token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
-            if not token:
-                data = {
-                    'client_id': 'f1a362d288c1b98099c7',
-                    'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
-                }
-                username, password = self._get_login_info()
-                if username:
-                    data.update({
-                        'grant_type': 'password',
-                        'password': password,
-                        'username': username,
-                    })
-                else:
-                    data['grant_type'] = 'client_credentials'
-                try:
-                    token = self._download_json(
-                        'https://graphql.api.dailymotion.com/oauth/token',
-                        None, 'Downloading Access Token',
-                        data=urlencode_postdata(data))['access_token']
-                except ExtractorError as e:
-                    if isinstance(e.cause, HTTPError) and e.cause.status == 400:
-                        raise ExtractorError(self._parse_json(
-                            e.cause.response.read().decode(), xid)['error_description'], expected=True)
-                    raise
-                self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
-            self._HEADERS['Authorization'] = 'Bearer ' + token
+            self._HEADERS['Authorization'] = f'Bearer {self._get_token(xid)}'
 
         resp = self._download_json(
             'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({
@@ -393,9 +399,55 @@ def _extract_embed_urls(cls, url, webpage):
                 yield '//dailymotion.com/playlist/%s' % p
 
 
+class DailymotionSearchIE(DailymotionPlaylistBaseIE):
+    IE_NAME = 'dailymotion:search'
+    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/search/(?P<id>[^/?#]+)/videos'
+    _PAGE_SIZE = 20
+    _TESTS = [{
+        'url': 'http://www.dailymotion.com/search/king of turtles/videos',
+        'info_dict': {
+            'id': 'king of turtles',
+            'title': 'king of turtles',
+        },
+        'playlist_mincount': 90,
+    }]
+    _SEARCH_QUERY = 'query SEARCH_QUERY( $query: String! $page: Int $limit: Int ) { search { videos( query: $query first: $limit page: $page ) { edges { node { xid } } } } } '
+
+    def _call_search_api(self, term, page, note):
+        if not self._HEADERS.get('Authorization'):
+            self._HEADERS['Authorization'] = f'Bearer {self._get_token(term)}'
+        resp = self._download_json(
+            'https://graphql.api.dailymotion.com/', None, note, data=json.dumps({
+                'operationName': 'SEARCH_QUERY',
+                'query': self._SEARCH_QUERY,
+                'variables': {
+                    'limit': 20,
+                    'page': page,
+                    'query': term,
+                }
+            }).encode(), headers=self._HEADERS)
+        obj = traverse_obj(resp, ('data', 'search', {dict}))
+        if not obj:
+            raise ExtractorError(
+                traverse_obj(resp, ('errors', 0, 'message', {str})) or 'Could not fetch search data')
+
+        return obj
+
+    def _fetch_page(self, term, page):
+        page += 1
+        response = self._call_search_api(term, page, f'Searching "{term}" page {page}')
+        for xid in traverse_obj(response, ('videos', 'edges', ..., 'node', 'xid')):
+            yield self.url_result(f'https://www.dailymotion.com/video/{xid}', DailymotionIE, xid)
+
+    def _real_extract(self, url):
+        term = urllib.parse.unquote_plus(self._match_id(url))
+        return self.playlist_result(
+            OnDemandPagedList(functools.partial(self._fetch_page, term), self._PAGE_SIZE), term, term)
+
+
 class DailymotionUserIE(DailymotionPlaylistBaseIE):
     IE_NAME = 'dailymotion:user'
-    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
     _TESTS = [{
         'url': 'https://www.dailymotion.com/user/nqtv',
         'info_dict': {

diff --git a/yt_dlp/extractor/dumpert.py b/yt_dlp/extractor/dumpert.py
@@ -8,9 +8,9 @@
 
 class DumpertIE(InfoExtractor):
     _VALID_URL = r'''(?x)
-        (?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl(?:
-            /(?:mediabase|embed|item)/|
-            (?:/toppers|/latest|/?)\?selectedId=
+        (?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:
+            (?:mediabase|embed|item)/|
+            [^#]*[?&]selectedId=
         )(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'''
     _TESTS = [{
         'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
@@ -56,6 +56,9 @@ class DumpertIE(InfoExtractor):
     }, {
         'url': 'https://www.dumpert.nl/?selectedId=100031688_b317a185',
         'only_matching': True,
+    }, {
+        'url': 'https://www.dumpert.nl/toppers/dag?selectedId=100086074_f5cef3ac',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):