Skip to content

Commit

Permalink
Merge branch 'yt-dlp:master' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
kclauhk authored Jul 12, 2024
2 parents c7b0d1f + cc1a309 commit d76efcc
Show file tree
Hide file tree
Showing 10 changed files with 53 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ jobs:
- name: Install Requirements
run: |
python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py
python devscripts/install_deps.py --include curl-cffi
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.7.0-py3-none-any.whl"
- name: Prepare
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ The following provide support for impersonating browser requests. This may be re

* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
* Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"`
* Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds
* Currently included in `yt-dlp.exe`, `yt-dlp_x86.exe`, `yt-dlp_linux` and `yt-dlp_macos` builds


### Metadata
Expand Down
2 changes: 1 addition & 1 deletion bundle/docker/static/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
set -e

source ~/.local/share/pipx/venvs/pyinstaller/bin/activate
python -m devscripts.install_deps --include secretstorage
python -m devscripts.install_deps --include secretstorage --include curl-cffi
python -m devscripts.make_lazy_extractors
python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}"
python -m bundle.pyinstaller
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ dependencies = [

[project.optional-dependencies]
default = []
curl-cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
curl-cffi = [
"curl-cffi>=0.5.10,!=0.6.*,<0.8; implementation_name=='cpython'",
]
secretstorage = [
"cffi",
"secretstorage",
Expand Down
1 change: 0 additions & 1 deletion test/test_networking.py
Original file line number Diff line number Diff line change
Expand Up @@ -914,7 +914,6 @@ def mock_close(*args, **kwargs):
class TestCurlCFFIRequestHandler(TestRequestHandlerBase):

@pytest.mark.parametrize('params,extensions', [
({}, {'impersonate': ImpersonateTarget('chrome')}),
({'impersonate': ImpersonateTarget('chrome', '110')}, {}),
({'impersonate': ImpersonateTarget('chrome', '99')}, {'impersonate': ImpersonateTarget('chrome', '110')}),
])
Expand Down
20 changes: 16 additions & 4 deletions yt_dlp/extractor/box.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


class BoxIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<service>app|ent)\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
_TESTS = [{
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
Expand All @@ -38,10 +38,22 @@ class BoxIE(InfoExtractor):
'uploader_id': '239068974',
},
'params': {'skip_download': 'dash fragment too small'},
}, {
'url': 'https://thejacksonlaboratory.ent.box.com/s/2x09dm6vcg6y28o0oox1so4l0t8wzt6l/file/1536173056065',
'info_dict': {
'id': '1536173056065',
'ext': 'mp4',
'uploader_id': '18523128264',
'uploader': 'Lexi Hennigan',
'title': 'iPSC Symposium recording part 1.mp4',
'timestamp': 1716228343,
'upload_date': '20240520',
},
'params': {'skip_download': 'dash fragment too small'},
}]

def _real_extract(self, url):
shared_name, file_id = self._match_valid_url(url).groups()
shared_name, file_id, service = self._match_valid_url(url).group('shared_name', 'id', 'service')
webpage = self._download_webpage(url, file_id or shared_name)

if not file_id:
Expand All @@ -57,14 +69,14 @@ def _real_extract(self, url):
request_token = self._search_json(
r'Box\.config\s*=', webpage, 'Box config', file_id)['requestToken']
access_token = self._download_json(
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
f'https://{service}.box.com/app-api/enduserapp/elements/tokens', file_id,
'Downloading token JSON metadata',
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
'Content-Type': 'application/json',
'X-Request-Token': request_token,
'X-Box-EndUser-API': 'sharedName=' + shared_name,
})[file_id]['read']
shared_link = 'https://app.box.com/s/' + shared_name
shared_link = f'https://{service}.box.com/s/{shared_name}'
f = self._download_json(
'https://api.box.com/2.0/files/' + file_id, file_id,
'Downloading file JSON metadata', headers={
Expand Down
8 changes: 5 additions & 3 deletions yt_dlp/extractor/tiktok.py
Original file line number Diff line number Diff line change
Expand Up @@ -1458,9 +1458,11 @@ def _real_extract(self, url):

if webpage:
data = self._get_sigi_state(webpage, uploader or room_id)
room_id = (traverse_obj(data, ('UserModule', 'users', ..., 'roomId', {str_or_none}), get_all=False)
or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None)
or room_id)
room_id = (
traverse_obj(data, ((
('LiveRoom', 'liveRoomUserInfo', 'user'),
('UserModule', 'users', ...)), 'roomId', {str}, any))
or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=room_id))
uploader = uploader or traverse_obj(
data, ('LiveRoom', 'liveRoomUserInfo', 'user', 'uniqueId'),
('UserModule', 'users', ..., 'uniqueId'), get_all=False, expected_type=str)
Expand Down
7 changes: 4 additions & 3 deletions yt_dlp/extractor/tv5mondeplus.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def _extract_subtitles(data_captions):

def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
webpage = self._download_webpage(url, display_id, impersonate=True)

if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
self.raise_geo_restricted(countries=['FR'])
Expand All @@ -122,8 +122,9 @@ def process_video_files(v):
if not token:
continue
deferred_json = self._download_json(
f'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true', display_id,
note='Downloading deferred info', headers={'Authorization': f'Bearer {token}'}, fatal=False)
f'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true',
display_id, 'Downloading deferred info', fatal=False, impersonate=True,
headers={'Authorization': f'Bearer {token}'})
v_url = traverse_obj(deferred_json, (0, 'url', {url_or_none}))
if not v_url:
continue
Expand Down
2 changes: 1 addition & 1 deletion yt_dlp/extractor/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -3142,7 +3142,7 @@ def _extract_n_function_name(self, jscode):

def _extract_n_function_code(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09')
jscode = func_code or self._load_player(video_id, player_url)
jsi = JSInterpreter(jscode)

Expand Down
27 changes: 21 additions & 6 deletions yt_dlp/networking/_curlcffi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import io
import math
import re
import urllib.parse

from ._helper import InstanceStoreMixin, select_proxy
Expand All @@ -27,11 +28,12 @@
if curl_cffi is None:
raise ImportError('curl_cffi is not installed')

curl_cffi_version = tuple(int_or_none(x, default=0) for x in curl_cffi.__version__.split('.'))

if curl_cffi_version != (0, 5, 10):
curl_cffi_version = tuple(map(int, re.split(r'[^\d]+', curl_cffi.__version__)[:3]))

if curl_cffi_version != (0, 5, 10) and not ((0, 7, 0) <= curl_cffi_version < (0, 8, 0)):
curl_cffi._yt_dlp__version = f'{curl_cffi.__version__} (unsupported)'
raise ImportError('Only curl_cffi 0.5.10 is supported')
raise ImportError('Only curl_cffi versions 0.5.10, 0.7.X are supported')

import curl_cffi.requests
from curl_cffi.const import CurlECode, CurlOpt
Expand Down Expand Up @@ -110,6 +112,13 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
_SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
_SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
_SUPPORTED_IMPERSONATE_TARGET_MAP = {
**({
ImpersonateTarget('chrome', '124', 'macos', '14'): curl_cffi.requests.BrowserType.chrome124,
ImpersonateTarget('chrome', '123', 'macos', '14'): curl_cffi.requests.BrowserType.chrome123,
ImpersonateTarget('chrome', '120', 'macos', '14'): curl_cffi.requests.BrowserType.chrome120,
ImpersonateTarget('chrome', '119', 'macos', '14'): curl_cffi.requests.BrowserType.chrome119,
ImpersonateTarget('chrome', '116', 'windows', '10'): curl_cffi.requests.BrowserType.chrome116,
} if curl_cffi_version >= (0, 7, 0) else {}),
ImpersonateTarget('chrome', '110', 'windows', '10'): curl_cffi.requests.BrowserType.chrome110,
ImpersonateTarget('chrome', '107', 'windows', '10'): curl_cffi.requests.BrowserType.chrome107,
ImpersonateTarget('chrome', '104', 'windows', '10'): curl_cffi.requests.BrowserType.chrome104,
Expand All @@ -118,9 +127,15 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
ImpersonateTarget('chrome', '99', 'windows', '10'): curl_cffi.requests.BrowserType.chrome99,
ImpersonateTarget('edge', '101', 'windows', '10'): curl_cffi.requests.BrowserType.edge101,
ImpersonateTarget('edge', '99', 'windows', '10'): curl_cffi.requests.BrowserType.edge99,
**({
ImpersonateTarget('safari', '17.0', 'macos', '14'): curl_cffi.requests.BrowserType.safari17_0,
} if curl_cffi_version >= (0, 7, 0) else {}),
ImpersonateTarget('safari', '15.5', 'macos', '12'): curl_cffi.requests.BrowserType.safari15_5,
ImpersonateTarget('safari', '15.3', 'macos', '11'): curl_cffi.requests.BrowserType.safari15_3,
ImpersonateTarget('chrome', '99', 'android', '12'): curl_cffi.requests.BrowserType.chrome99_android,
**({
ImpersonateTarget('safari', '17.2', 'ios', '17.2'): curl_cffi.requests.BrowserType.safari17_2_ios,
} if curl_cffi_version >= (0, 7, 0) else {}),
}

def _create_instance(self, cookiejar=None):
Expand Down Expand Up @@ -187,7 +202,7 @@ def _send(self, request: Request):
timeout = self._calculate_timeout(request)

# set CURLOPT_LOW_SPEED_LIMIT and CURLOPT_LOW_SPEED_TIME to act as a read timeout. [1]
# curl_cffi does not currently do this. [2]
# This is required only for 0.5.10 [2]
# Note: CURLOPT_LOW_SPEED_TIME is in seconds, so we need to round up to the nearest second. [3]
# [1] https://unix.stackexchange.com/a/305311
# [2] https://github.com/yifeikong/curl_cffi/issues/156
Expand All @@ -203,7 +218,7 @@ def _send(self, request: Request):
data=request.data,
verify=self.verify,
max_redirects=5,
timeout=timeout,
timeout=(timeout, timeout),
impersonate=self._SUPPORTED_IMPERSONATE_TARGET_MAP.get(
self._get_request_target(request)),
interface=self.source_address,
Expand All @@ -222,7 +237,7 @@ def _send(self, request: Request):

elif (
e.code == CurlECode.PROXY
or (e.code == CurlECode.RECV_ERROR and 'Received HTTP code 407 from proxy after CONNECT' in str(e))
or (e.code == CurlECode.RECV_ERROR and 'CONNECT' in str(e))
):
raise ProxyError(cause=e) from e
else:
Expand Down

0 comments on commit d76efcc

Please sign in to comment.