refactor: extract cache utilities (#7621)

Co-authored-by: Randy Döring <[email protected]>
python-poetry · Mar 19, 2023 · 36ca327 · 36ca327
1 parent b8e912d
commit 36ca327
Show file tree

Hide file tree

Showing 9 changed files with 419 additions and 379 deletions.
diff --git a/src/poetry/config/config.py b/src/poetry/config/config.py
@@ -210,7 +210,11 @@ def _get_environment_repositories() -> dict[str, dict[str, str]]:
 
     @property
     def repository_cache_directory(self) -> Path:
-        return Path(self.get("cache-dir")) / "cache" / "repositories"
+        return Path(self.get("cache-dir")).expanduser() / "cache" / "repositories"
+
+    @property
+    def artifacts_cache_directory(self) -> Path:
+        return Path(self.get("cache-dir")).expanduser() / "artifacts"
 
     @property
     def virtualenvs_path(self) -> Path:

diff --git a/src/poetry/installation/chef.py b/src/poetry/installation/chef.py
@@ -1,7 +1,5 @@
 from __future__ import annotations
 
-import hashlib
-import json
 import tarfile
 import tempfile
 import zipfile
@@ -19,18 +17,14 @@
 from poetry.core.utils.helpers import temporary_directory
 from pyproject_hooks import quiet_subprocess_runner  # type: ignore[import]
 
-from poetry.installation.chooser import InvalidWheelName
-from poetry.installation.chooser import Wheel
 from poetry.utils.env import ephemeral_environment
 
 
 if TYPE_CHECKING:
     from contextlib import AbstractContextManager
 
-    from poetry.core.packages.utils.link import Link
-
-    from poetry.config.config import Config
     from poetry.repositories import RepositoryPool
+    from poetry.utils.cache import ArtifactCache
     from poetry.utils.env import Env
 
 
@@ -86,12 +80,12 @@ def install(self, requirements: Collection[str]) -> None:
 
 
 class Chef:
-    def __init__(self, config: Config, env: Env, pool: RepositoryPool) -> None:
+    def __init__(
+        self, artifact_cache: ArtifactCache, env: Env, pool: RepositoryPool
+    ) -> None:
         self._env = env
         self._pool = pool
-        self._cache_dir = (
-            Path(config.get("cache-dir")).expanduser().joinpath("artifacts")
-        )
+        self._artifact_cache = artifact_cache
 
     def prepare(
         self, archive: Path, output_dir: Path | None = None, *, editable: bool = False
@@ -181,7 +175,9 @@ def _prepare_sdist(self, archive: Path, destination: Path | None = None) -> Path
                     sdist_dir = archive_dir
 
             if destination is None:
-                destination = self.get_cache_directory_for_link(Link(archive.as_uri()))
+                destination = self._artifact_cache.get_cache_directory_for_link(
+                    Link(archive.as_uri())
+                )
 
             destination.mkdir(parents=True, exist_ok=True)
 
@@ -196,72 +192,3 @@ def _should_prepare(self, archive: Path) -> bool:
     @classmethod
     def _is_wheel(cls, archive: Path) -> bool:
         return archive.suffix == ".whl"
-
-    def get_cached_archive_for_link(self, link: Link, *, strict: bool) -> Path | None:
-        archives = self.get_cached_archives_for_link(link)
-        if not archives:
-            return None
-
-        candidates: list[tuple[float | None, Path]] = []
-        for archive in archives:
-            if strict:
-                # in strict mode return the original cached archive instead of the
-                # prioritized archive type.
-                if link.filename == archive.name:
-                    return archive
-                continue
-            if archive.suffix != ".whl":
-                candidates.append((float("inf"), archive))
-                continue
-
-            try:
-                wheel = Wheel(archive.name)
-            except InvalidWheelName:
-                continue
-
-            if not wheel.is_supported_by_environment(self._env):
-                continue
-
-            candidates.append(
-                (wheel.get_minimum_supported_index(self._env.supported_tags), archive),
-            )
-
-        if not candidates:
-            return None
-
-        return min(candidates)[1]
-
-    def get_cached_archives_for_link(self, link: Link) -> list[Path]:
-        cache_dir = self.get_cache_directory_for_link(link)
-
-        archive_types = ["whl", "tar.gz", "tar.bz2", "bz2", "zip"]
-        paths = []
-        for archive_type in archive_types:
-            for archive in cache_dir.glob(f"*.{archive_type}"):
-                paths.append(Path(archive))
-
-        return paths
-
-    def get_cache_directory_for_link(self, link: Link) -> Path:
-        key_parts = {"url": link.url_without_fragment}
-
-        if link.hash_name is not None and link.hash is not None:
-            key_parts[link.hash_name] = link.hash
-
-        if link.subdirectory_fragment:
-            key_parts["subdirectory"] = link.subdirectory_fragment
-
-        key_parts["interpreter_name"] = self._env.marker_env["interpreter_name"]
-        key_parts["interpreter_version"] = "".join(
-            self._env.marker_env["interpreter_version"].split(".")[:2]
-        )
-
-        key = hashlib.sha256(
-            json.dumps(
-                key_parts, sort_keys=True, separators=(",", ":"), ensure_ascii=True
-            ).encode("ascii")
-        ).hexdigest()
-
-        split_key = [key[:2], key[2:4], key[4:6], key[6:]]
-
-        return self._cache_dir.joinpath(*split_key)
diff --git a/src/poetry/installation/chooser.py b/src/poetry/installation/chooser.py
@@ -6,11 +6,9 @@
 from typing import TYPE_CHECKING
 from typing import Any
 
-from packaging.tags import Tag
-
 from poetry.config.config import Config
 from poetry.config.config import PackageFilterPolicy
-from poetry.utils.patterns import wheel_file_re
+from poetry.utils.wheel import Wheel
 
 
 if TYPE_CHECKING:
@@ -25,37 +23,6 @@
 logger = logging.getLogger(__name__)
 
 
-class InvalidWheelName(Exception):
-    pass
-
-
-class Wheel:
-    def __init__(self, filename: str) -> None:
-        wheel_info = wheel_file_re.match(filename)
-        if not wheel_info:
-            raise InvalidWheelName(f"{filename} is not a valid wheel filename.")
-
-        self.filename = filename
-        self.name = wheel_info.group("name").replace("_", "-")
-        self.version = wheel_info.group("ver").replace("_", "-")
-        self.build_tag = wheel_info.group("build")
-        self.pyversions = wheel_info.group("pyver").split(".")
-        self.abis = wheel_info.group("abi").split(".")
-        self.plats = wheel_info.group("plat").split(".")
-
-        self.tags = {
-            Tag(x, y, z) for x in self.pyversions for y in self.abis for z in self.plats
-        }
-
-    def get_minimum_supported_index(self, tags: list[Tag]) -> int | None:
-        indexes = [tags.index(t) for t in self.tags if t in tags]
-
-        return min(indexes) if indexes else None
-
-    def is_supported_by_environment(self, env: Env) -> bool:
-        return bool(set(env.supported_tags).intersection(self.tags))
-
-
 class Chooser:
     """
     A Chooser chooses an appropriate release archive for packages.

diff --git a/src/poetry/installation/executor.py b/src/poetry/installation/executor.py
@@ -27,6 +27,7 @@
 from poetry.puzzle.exceptions import SolverProblemError
 from poetry.utils._compat import decode
 from poetry.utils.authenticator import Authenticator
+from poetry.utils.cache import ArtifactCache
 from poetry.utils.env import EnvCommandError
 from poetry.utils.helpers import atomic_open
 from poetry.utils.helpers import get_file_hash
@@ -77,10 +78,11 @@ def __init__(
         else:
             self._max_workers = 1
 
+        self._artifact_cache = ArtifactCache(cache_dir=config.artifacts_cache_directory)
         self._authenticator = Authenticator(
             config, self._io, disable_cache=disable_cache, pool_size=self._max_workers
         )
-        self._chef = Chef(config, self._env, pool)
+        self._chef = Chef(self._artifact_cache, self._env, pool)
         self._chooser = Chooser(pool, self._env, config)
 
         self._executor = ThreadPoolExecutor(max_workers=self._max_workers)
@@ -709,15 +711,19 @@ def _download(self, operation: Install | Update) -> Path:
     def _download_link(self, operation: Install | Update, link: Link) -> Path:
         package = operation.package
 
-        output_dir = self._chef.get_cache_directory_for_link(link)
+        output_dir = self._artifact_cache.get_cache_directory_for_link(link)
         # Try to get cached original package for the link provided
-        original_archive = self._chef.get_cached_archive_for_link(link, strict=True)
+        original_archive = self._artifact_cache.get_cached_archive_for_link(
+            link, strict=True
+        )
         if original_archive is None:
             # No cached original distributions was found, so we download and prepare it
             try:
                 original_archive = self._download_archive(operation, link)
             except BaseException:
-                cache_directory = self._chef.get_cache_directory_for_link(link)
+                cache_directory = self._artifact_cache.get_cache_directory_for_link(
+                    link
+                )
                 cached_file = cache_directory.joinpath(link.filename)
                 # We can't use unlink(missing_ok=True) because it's not available
                 # prior to Python 3.8
@@ -728,7 +734,11 @@ def _download_link(self, operation: Install | Update, link: Link) -> Path:
 
         # Get potential higher prioritized cached archive, otherwise it will fall back
         # to the original archive.
-        archive = self._chef.get_cached_archive_for_link(link, strict=False)
+        archive = self._artifact_cache.get_cached_archive_for_link(
+            link,
+            strict=False,
+            env=self._env,
+        )
         # 'archive' can at this point never be None. Since we previously downloaded
         # an archive, we now should have something cached that we can use here
         assert archive is not None
@@ -792,7 +802,9 @@ def _download_archive(self, operation: Install | Update, link: Link) -> Path:
                 progress.start()
 
         done = 0
-        archive = self._chef.get_cache_directory_for_link(link) / link.filename
+        archive = (
+            self._artifact_cache.get_cache_directory_for_link(link) / link.filename
+        )
         archive.parent.mkdir(parents=True, exist_ok=True)
         with atomic_open(archive) as f:
             for chunk in response.iter_content(chunk_size=4096):

diff --git a/src/poetry/utils/cache.py b/src/poetry/utils/cache.py
@@ -8,11 +8,21 @@
 import time
 
 from pathlib import Path
+from typing import TYPE_CHECKING
 from typing import Any
 from typing import Callable
 from typing import Generic
 from typing import TypeVar
 
+from poetry.utils.wheel import InvalidWheelName
+from poetry.utils.wheel import Wheel
+
+
+if TYPE_CHECKING:
+    from poetry.core.packages.utils.link import Link
+
+    from poetry.utils.env import Env
+
 
 # Used by Cachy for items that do not expire.
 MAX_DATE = 9999999999
@@ -196,3 +206,83 @@ def _deserialize(self, data_raw: bytes) -> CacheItem[T]:
         data = json.loads(data_str[10:])
         expires = int(data_str[:10])
         return CacheItem(data, expires)
+
+
+class ArtifactCache:
+    def __init__(self, *, cache_dir: Path) -> None:
+        self._cache_dir = cache_dir
+
+    def get_cache_directory_for_link(self, link: Link) -> Path:
+        key_parts = {"url": link.url_without_fragment}
+
+        if link.hash_name is not None and link.hash is not None:
+            key_parts[link.hash_name] = link.hash
+
+        if link.subdirectory_fragment:
+            key_parts["subdirectory"] = link.subdirectory_fragment
+
+        key = hashlib.sha256(
+            json.dumps(
+                key_parts, sort_keys=True, separators=(",", ":"), ensure_ascii=True
+            ).encode("ascii")
+        ).hexdigest()
+
+        split_key = [key[:2], key[2:4], key[4:6], key[6:]]
+
+        return self._cache_dir.joinpath(*split_key)
+
+    def get_cached_archive_for_link(
+        self,
+        link: Link,
+        *,
+        strict: bool,
+        env: Env | None = None,
+    ) -> Path | None:
+        assert strict or env is not None
+
+        archives = self._get_cached_archives_for_link(link)
+        if not archives:
+            return None
+
+        candidates: list[tuple[float | None, Path]] = []
+        for archive in archives:
+            if strict:
+                # in strict mode return the original cached archive instead of the
+                # prioritized archive type.
+                if link.filename == archive.name:
+                    return archive
+                continue
+
+            assert env is not None
+
+            if archive.suffix != ".whl":
+                candidates.append((float("inf"), archive))
+                continue
+
+            try:
+                wheel = Wheel(archive.name)
+            except InvalidWheelName:
+                continue
+
+            if not wheel.is_supported_by_environment(env):
+                continue
+
+            candidates.append(
+                (wheel.get_minimum_supported_index(env.supported_tags), archive),
+            )
+
+        if not candidates:
+            return None
+
+        return min(candidates)[1]
+
+    def _get_cached_archives_for_link(self, link: Link) -> list[Path]:
+        cache_dir = self.get_cache_directory_for_link(link)
+
+        archive_types = ["whl", "tar.gz", "tar.bz2", "bz2", "zip"]
+        paths = []
+        for archive_type in archive_types:
+            for archive in cache_dir.glob(f"*.{archive_type}"):
+                paths.append(Path(archive))
+
+        return paths