From 81b5dcecc0b1710e01967654fe5204c162ead47a Mon Sep 17 00:00:00 2001 From: ralbertazzi Date: Sat, 13 May 2023 13:40:17 +0200 Subject: [PATCH 1/2] perf: use cached file for wheel inspection --- src/poetry/repositories/http_repository.py | 19 +++++++++++++------ src/poetry/utils/authenticator.py | 8 ++++++++ tests/utils/test_authenticator.py | 16 ++++++++++++++++ 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/src/poetry/repositories/http_repository.py b/src/poetry/repositories/http_repository.py index 298fc1e815b..e9d06d20cf1 100644 --- a/src/poetry/repositories/http_repository.py +++ b/src/poetry/repositories/http_repository.py @@ -79,10 +79,13 @@ def _get_info_from_wheel(self, url: str) -> PackageInfo: from poetry.inspection.info import PackageInfo wheel_name = urllib.parse.urlparse(url).path.rsplit("/")[-1] - self._log(f"Downloading wheel: {wheel_name}", level="debug") - filename = os.path.basename(wheel_name) + filepath = self._authenticator.get_cached_file_for_url(url) + if filepath: + return PackageInfo.from_wheel(filepath) + self._log(f"Downloading wheel: {wheel_name}", level="debug") + filename = os.path.basename(wheel_name) with temporary_directory() as temp_dir: filepath = Path(temp_dir) / filename self._download(url, filepath) @@ -95,10 +98,12 @@ def _get_info_from_sdist(self, url: str) -> PackageInfo: sdist_name = urllib.parse.urlparse(url).path sdist_name_log = sdist_name.rsplit("/")[-1] - self._log(f"Downloading sdist: {sdist_name_log}", level="debug") + filepath = self._authenticator.get_cached_file_for_url(url) + if filepath: + return PackageInfo.from_wheel(filepath) + self._log(f"Downloading sdist: {sdist_name_log}", level="debug") filename = os.path.basename(sdist_name) - with temporary_directory() as temp_dir: filepath = Path(temp_dir) / filename self._download(url, filepath) @@ -238,8 +243,10 @@ def _links_to_data(self, links: list[Link], data: PackageInfo) -> dict[str, Any] and hasattr(hashlib, link.hash_name) ): with temporary_directory() as temp_dir: - filepath = Path(temp_dir) / link.filename - self._download(link.url, filepath) + filepath = self._authenticator.get_cached_file_for_url(link.url) + if not filepath: + filepath = Path(temp_dir) / link.filename + self._download(link.url, filepath) known_hash = ( getattr(hashlib, link.hash_name)() if link.hash_name else None diff --git a/src/poetry/utils/authenticator.py b/src/poetry/utils/authenticator.py index edf6fe073e4..beb20342ca8 100644 --- a/src/poetry/utils/authenticator.py +++ b/src/poetry/utils/authenticator.py @@ -19,6 +19,7 @@ from cachecontrol import CacheControlAdapter from cachecontrol.caches import FileCache +from cachecontrol.caches.file_cache import url_to_file_path from filelock import FileLock from poetry.config.config import Config @@ -463,6 +464,13 @@ def _get_certs_for_url(self, url: str) -> RepositoryCertificateConfig: return selected.certs(config=self._config) return RepositoryCertificateConfig() + def get_cached_file_for_url(self, url: str) -> Path | None: + if self._cache_control is None: + return None + + path = Path(url_to_file_path(url, self._cache_control)) + return path if path.exists() else None + _authenticator: Authenticator | None = None diff --git a/tests/utils/test_authenticator.py b/tests/utils/test_authenticator.py index 2a0d1bee564..335e2b50c1e 100644 --- a/tests/utils/test_authenticator.py +++ b/tests/utils/test_authenticator.py @@ -627,6 +627,22 @@ def test_authenticator_git_repositories( assert not three.password +def test_authenticator_get_cached_file_for_url__cache_miss(config: Config) -> None: + authenticator = Authenticator(config, NullIO()) + assert ( + authenticator.get_cached_file_for_url("https://foo.bar/cache/miss.whl") is None + ) + + +def test_authenticator_get_cached_file_for_url__cache_hit(config: Config) -> None: + authenticator = Authenticator(config, NullIO()) + url = "https://foo.bar/files/foo-0.1.0.tar.gz" + + authenticator._cache_control.set(url, b"hello") + + assert authenticator.get_cached_file_for_url(url) + + @pytest.mark.parametrize( ("ca_cert", "client_cert", "result"), [ From 9d68507d82e5f00f4e77dc542aba6ab51f9c7505 Mon Sep 17 00:00:00 2001 From: ralbertazzi Date: Sat, 13 May 2023 21:49:17 +0200 Subject: [PATCH 2/2] refactor: unify cache or download logic --- src/poetry/repositories/http_repository.py | 51 ++++++++-------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/src/poetry/repositories/http_repository.py b/src/poetry/repositories/http_repository.py index e9d06d20cf1..ae96481a71d 100644 --- a/src/poetry/repositories/http_repository.py +++ b/src/poetry/repositories/http_repository.py @@ -2,14 +2,13 @@ import functools import hashlib -import os -import urllib -import urllib.parse from collections import defaultdict +from contextlib import contextmanager from pathlib import Path from typing import TYPE_CHECKING from typing import Any +from typing import Iterator import requests @@ -75,39 +74,28 @@ def authenticated_url(self) -> str: def _download(self, url: str, dest: Path) -> None: return download_file(url, dest, session=self.session) - def _get_info_from_wheel(self, url: str) -> PackageInfo: - from poetry.inspection.info import PackageInfo - - wheel_name = urllib.parse.urlparse(url).path.rsplit("/")[-1] - - filepath = self._authenticator.get_cached_file_for_url(url) + @contextmanager + def _cached_or_downloaded_file(self, link: Link) -> Iterator[Path]: + filepath = self._authenticator.get_cached_file_for_url(link.url) if filepath: - return PackageInfo.from_wheel(filepath) + yield filepath + else: + self._log(f"Downloading: {link.url}", level="debug") + with temporary_directory() as temp_dir: + filepath = Path(temp_dir) / link.filename + self._download(link.url, filepath) + yield filepath - self._log(f"Downloading wheel: {wheel_name}", level="debug") - filename = os.path.basename(wheel_name) - with temporary_directory() as temp_dir: - filepath = Path(temp_dir) / filename - self._download(url, filepath) + def _get_info_from_wheel(self, url: str) -> PackageInfo: + from poetry.inspection.info import PackageInfo + with self._cached_or_downloaded_file(Link(url)) as filepath: return PackageInfo.from_wheel(filepath) def _get_info_from_sdist(self, url: str) -> PackageInfo: from poetry.inspection.info import PackageInfo - sdist_name = urllib.parse.urlparse(url).path - sdist_name_log = sdist_name.rsplit("/")[-1] - - filepath = self._authenticator.get_cached_file_for_url(url) - if filepath: - return PackageInfo.from_wheel(filepath) - - self._log(f"Downloading sdist: {sdist_name_log}", level="debug") - filename = os.path.basename(sdist_name) - with temporary_directory() as temp_dir: - filepath = Path(temp_dir) / filename - self._download(url, filepath) - + with self._cached_or_downloaded_file(Link(url)) as filepath: return PackageInfo.from_sdist(filepath) def _get_info_from_urls(self, urls: dict[str, list[str]]) -> PackageInfo: @@ -242,12 +230,7 @@ def _links_to_data(self, links: list[Link], data: PackageInfo) -> dict[str, Any] and link.hash_name not in ("sha256", "sha384", "sha512") and hasattr(hashlib, link.hash_name) ): - with temporary_directory() as temp_dir: - filepath = self._authenticator.get_cached_file_for_url(link.url) - if not filepath: - filepath = Path(temp_dir) / link.filename - self._download(link.url, filepath) - + with self._cached_or_downloaded_file(link) as filepath: known_hash = ( getattr(hashlib, link.hash_name)() if link.hash_name else None )