diff --git a/pex/cache/data.py b/pex/cache/data.py new file mode 100644 index 000000000..bf6bf0e63 --- /dev/null +++ b/pex/cache/data.py @@ -0,0 +1,189 @@ +# Copyright 2024 Pex project contributors. +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import absolute_import + +import os.path +import sqlite3 +from contextlib import contextmanager +from typing import Union + +from pex.atomic_directory import atomic_directory +from pex.cache.dirs import VenvDir +from pex.common import CopyMode +from pex.dist_metadata import ProjectNameAndVersion +from pex.typing import TYPE_CHECKING +from pex.variables import ENV + +if TYPE_CHECKING: + from typing import Iterator, List, Tuple + + from pex.pex_info import PexInfo + + +_SCHEMA = """ +PRAGMA journal_mode=WAL; + +CREATE TABLE wheels ( + name TEXT NOT NULL, + hash TEXT NOT NULL, + project_name TEXT NOT NULL, + version TEXT NOT NULL, + PRIMARY KEY (name ASC, hash ASC) +) WITHOUT ROWID; +CREATE UNIQUE INDEX wheels_idx_hash ON wheels (hash ASC); +CREATE INDEX wheels_idx_project_name ON wheels (project_name ASC); +CREATE INDEX wheels_idx_version ON wheels (version ASC); + +CREATE TABLE zipapps ( + pex_hash TEXT PRIMARY KEY ASC, + bootstrap_hash TEXT NOT NULL, + code_hash TEXT NOT NULL +) WITHOUT ROWID; +CREATE INDEX zipapps_idx_bootstrap_hash ON zipapps (bootstrap_hash ASC); +CREATE INDEX zipapps_idx_code_hash ON zipapps (code_hash ASC); + +CREATE TABLE zipapp_deps ( + pex_hash TEXT NOT NULL REFERENCES zipapps(pex_hash) ON DELETE CASCADE, + wheel_hash TEXT NOT NULL REFERENCES wheels(hash) ON DELETE CASCADE +); +CREATE INDEX zipapp_deps_idx_pex_hash ON zipapp_deps (pex_hash ASC); +CREATE INDEX zipapp_deps_idx_wheel_hash ON zipapp_deps (wheel_hash ASC); + +CREATE TABLE venvs ( + short_hash TEXT PRIMARY KEY ASC, + pex_hash TEXT NOT NULL, + contents_hash TEXT NOT NULL +) WITHOUT ROWID; +CREATE INDEX venvs_idx_pex_hash ON venvs (pex_hash ASC); + +CREATE TABLE venv_deps ( + venv_hash TEXT NOT NULL REFERENCES venvs(short_hash) ON DELETE CASCADE, + wheel_hash TEXT NOT NULL REFERENCES wheels(hash) ON DELETE CASCADE +); +CREATE INDEX venv_deps_idx_venv_hash ON venv_deps (venv_hash ASC); +CREATE INDEX venv_deps_idx_wheel_hash ON venv_deps (wheel_hash ASC); +""" + + +@contextmanager +def db_connection(): + # type: () -> Iterator[sqlite3.Connection] + db_dir = os.path.join(ENV.PEX_ROOT, "data") + with atomic_directory(db_dir) as atomic_dir: + if not atomic_dir.is_finalized(): + with sqlite3.connect(os.path.join(atomic_dir.work_dir, "cache.db")) as conn: + conn.executescript(_SCHEMA).close() + with sqlite3.connect(os.path.join(db_dir, "cache.db")) as conn: + conn.executescript( + """ + PRAGMA synchronous=NORMAL; + PRAGMA foreign_keys=ON; + """ + ).close() + yield conn + + +@contextmanager +def _inserted_wheels(pex_info): + # type: (PexInfo) -> Iterator[sqlite3.Cursor] + + wheels = [] # type: List[Tuple[str, str, str, str]] + for wheel_name, wheel_hash in pex_info.distributions.items(): + pnav = ProjectNameAndVersion.from_filename(wheel_name) + wheels.append( + ( + wheel_name, + wheel_hash, + str(pnav.canonicalized_project_name), + str(pnav.canonicalized_version), + ) + ) + + with db_connection() as conn: + cursor = conn.executemany( + """ + INSERT OR IGNORE INTO wheels ( + name, + hash, + project_name, + version + ) VALUES (?, ?, ?, ?) + """, + wheels, + ) + yield cursor + cursor.close() + + +def record_zipapp_dependencies(pex_info): + # type: (PexInfo) -> None + + with _inserted_wheels(pex_info) as cursor: + cursor.execute( + """ + INSERT OR IGNORE INTO zipapps ( + pex_hash, + code_hash, + bootstrap_hash + ) VALUES (?, ?, ?) + """, + (pex_info.pex_hash, pex_info.bootstrap_hash, pex_info.code_hash), + ).executemany( + """ + INSERT OR IGNORE INTO zipapp_deps ( + pex_hash, + wheel_hash + ) VALUES (?, ?) + """, + tuple( + (pex_info.pex_hash, wheel_hash) for wheel_hash in pex_info.distributions.values() + ), + ).close() + + +def record_zipapp_access(unzip_dir): + # type: (...) -> None + os.utime(unzip_dir, None) + + +def record_venv_dependencies( + copy_mode, # type: CopyMode.Value + pex_info, # type: PexInfo + venv_dir, # type: VenvDir + venv_hash, # type: str +): + # type: (...) -> None + + def record_venv(coon_or_cursor): + # type: (Union[sqlite3.Connection, sqlite3.Cursor]) -> sqlite3.Cursor + return coon_or_cursor.execute( + """ + INSERT OR IGNORE INTO venvs ( + short_hash, + pex_hash, + contents_hash + ) VALUES (?, ?, ?) + """, + (venv_hash, venv_dir.pex_hash, venv_dir.contents_hash), + ) + + if copy_mode is CopyMode.SYMLINK: + with _inserted_wheels(pex_info) as cursor: + record_venv(cursor).executemany( + """ + INSERT OR IGNORE INTO venv_deps ( + venv_hash, + wheel_hash + ) VALUES (?, ?) + """, + tuple((venv_hash, wheel_hash) for wheel_hash in pex_info.distributions.values()), + ).close() + else: + with db_connection() as conn: + record_venv(conn).close() + + +def record_venv_access(venv_dir): + # type: (VenvDir) -> None + os.utime(venv_dir.path, None) diff --git a/pex/cache/dirs.py b/pex/cache/dirs.py index 02529abf6..a5c5683d0 100644 --- a/pex/cache/dirs.py +++ b/pex/cache/dirs.py @@ -6,13 +6,37 @@ import os from pex.enum import Enum -from pex.typing import TYPE_CHECKING +from pex.typing import TYPE_CHECKING, cast from pex.variables import ENV, Variables if TYPE_CHECKING: from typing import Iterable, Iterator, Union +class VenvDir(str): + @staticmethod + def __new__( + cls, + path, # type: str + pex_hash, # type: str + contents_hash, # type: str + ): + # type: (...) -> VenvDir + # MyPy incorrectly flags the call to super(VenvDir, cls).__new__(cls, path) for Python 2.7. + return cast(VenvDir, super(VenvDir, cls).__new__(cls, path)) # type: ignore[call-arg] + + def __init__( + self, + path, # type: str + pex_hash, # type: str + contents_hash, # type: str + ): + # type: (...) -> None + self.path = path + self.pex_hash = pex_hash + self.contents_hash = contents_hash + + class CacheDir(Enum["CacheDir.Value"]): class Value(Enum.Value): def __init__( diff --git a/pex/layout.py b/pex/layout.py index d8ee4b54c..fcf76208b 100644 --- a/pex/layout.py +++ b/pex/layout.py @@ -10,6 +10,7 @@ from pex.atomic_directory import atomic_directory from pex.cache import access as cache_access +from pex.cache.data import record_zipapp_access, record_zipapp_dependencies from pex.cache.dirs import CacheDir from pex.common import ZipFileEx, is_script, open_zip, safe_copy, safe_mkdir, safe_mkdtemp from pex.enum import Enum @@ -313,6 +314,8 @@ def _ensure_installed( if not os.path.exists(install_to): with ENV.patch(PEX_ROOT=pex_root): cache_access.read_write() + else: + record_zipapp_access(unzip_dir=install_to) with atomic_directory(install_to) as chroot: if not chroot.is_finalized(): with ENV.patch(PEX_ROOT=pex_root), TRACER.timed( @@ -367,6 +370,13 @@ def _ensure_installed( layout.extract_pex_info(chroot.work_dir) layout.extract_main(chroot.work_dir) layout.record(chroot.work_dir) + + with TRACER.timed( + "Recording zipapp install of {pex} {hash}".format( + pex=pex, hash=pex_info.pex_hash + ) + ): + record_zipapp_dependencies(pex_info) return install_to diff --git a/pex/pex_bootstrapper.py b/pex/pex_bootstrapper.py index 9fafdeafd..3dc424fff 100644 --- a/pex/pex_bootstrapper.py +++ b/pex/pex_bootstrapper.py @@ -10,6 +10,7 @@ from pex import pex_warnings from pex.atomic_directory import atomic_directory from pex.cache import access as cache_access +from pex.cache.data import record_venv_access, record_venv_dependencies from pex.cache.dirs import CacheDir from pex.common import CopyMode, die, pluralize from pex.environment import ResolveError @@ -523,6 +524,8 @@ def ensure_venv( if not os.path.exists(venv_dir): with ENV.patch(PEX_ROOT=pex_info.pex_root): cache_access.read_write() + else: + record_venv_access(venv_dir=venv_dir) with atomic_directory(venv_dir) as venv: if not venv.is_finalized(): from pex.venv.virtualenv import Virtualenv @@ -594,6 +597,18 @@ def ensure_venv( hermetic_scripts=pex_info.venv_hermetic_scripts, ) + with TRACER.timed( + "Recording venv install of {pex} {hash}".format( + pex=pex.path(), hash=pex_info.pex_hash + ) + ): + record_venv_dependencies( + copy_mode=copy_mode, + pex_info=pex_info, + venv_dir=venv_dir, + venv_hash=entropy, + ) + # There are popular Linux distributions with shebang length limits # (BINPRM_BUF_SIZE in /usr/include/linux/binfmts.h) set at 128 characters, so # we warn in the _very_ unlikely case that our shortened shebang is longer than diff --git a/pex/pex_info.py b/pex/pex_info.py index dbc07bdd3..f64ef17cb 100644 --- a/pex/pex_info.py +++ b/pex/pex_info.py @@ -24,6 +24,7 @@ from typing import Collection # type: ignore[attr-defined] from typing import Any, Dict, Iterable, Mapping, Optional, Text, Tuple, Union + from pex.cache.dirs import VenvDir from pex.dist_metadata import Requirement # N.B.: These are expensive imports and PexInfo is used during PEX bootstrapping which we want @@ -275,7 +276,7 @@ def _venv_dir( interpreter=None, # type: Optional[PythonInterpreter] expand_pex_root=True, # type: bool ): - # type: (...) -> Optional[str] + # type: (...) -> Optional[VenvDir] if not self.venv: return None if self.pex_hash is None: @@ -295,7 +296,7 @@ def runtime_venv_dir( pex_file, # type: str interpreter=None, # type: Optional[PythonInterpreter] ): - # type: (...) -> Optional[str] + # type: (...) -> Optional[VenvDir] return self._venv_dir(self.pex_root, pex_file, interpreter) def raw_venv_dir( @@ -303,7 +304,7 @@ def raw_venv_dir( pex_file, # type: str interpreter=None, # type: Optional[PythonInterpreter] ): - # type: (...) -> Optional[str] + # type: (...) -> Optional[VenvDir] """Distiguished from ``venv_dir`` by use of the raw_pex_root. We don't expand the pex_root at build time in case the pex_root is not writable or doesn't exist at build time. diff --git a/pex/pip/installation.py b/pex/pip/installation.py index 79fe16a2d..66caec726 100644 --- a/pex/pip/installation.py +++ b/pex/pip/installation.py @@ -54,6 +54,7 @@ def _pip_installation( isolated_pip_builder = PEXBuilder(path=chroot.work_dir) isolated_pip_builder.info.venv = True + isolated_pip_builder.info.venv_site_packages_copies = True for dist_location in iter_distribution_locations(): isolated_pip_builder.add_dist_location(dist=dist_location) with named_temporary_file(prefix="", suffix=".py", mode="w") as fp: diff --git a/pex/sh_boot.py b/pex/sh_boot.py index e23585037..c7c2eadcb 100644 --- a/pex/sh_boot.py +++ b/pex/sh_boot.py @@ -163,7 +163,7 @@ def create_sh_boot_script( venv_dir = pex_info.raw_venv_dir(pex_file=pex_name, interpreter=interpreter) if venv_dir: - pex_installed_path = venv_dir + pex_installed_path = venv_dir.path else: pex_hash = pex_info.pex_hash if pex_hash is None: diff --git a/pex/variables.py b/pex/variables.py index 43ffdebfe..b7baf47a2 100644 --- a/pex/variables.py +++ b/pex/variables.py @@ -27,7 +27,11 @@ _O = TypeVar("_O") _P = TypeVar("_P") - # N.B.: This is an expensive import and we only need it for type checking. + # N.B.: This import is circular, and we import lazily below as a result, but we also need the + # import eagerly for type checking. + from pex.cache.dirs import VenvDir # noqa + + # N.B.: This is an expensive import, and we only need it for type checking. from pex.interpreter import PythonInterpreter @@ -823,10 +827,10 @@ def venv_dir( pex_path=(), # type: Tuple[str, ...] expand_pex_root=True, # type: bool ): - # type: (...) -> str + # type: (...) -> VenvDir # N.B.: We need lazy import gymnastics here since CacheType uses Variables for PEX_ROOT. - from pex.cache.dirs import CacheDir + from pex.cache.dirs import CacheDir, VenvDir # The venv contents are affected by which PEX files are in play as well as which interpreter # is selected. The former is influenced via PEX_PATH and the latter is influenced by interpreter @@ -944,4 +948,4 @@ def warn(message): ) ) - return venv_path + return VenvDir(path=venv_path, pex_hash=pex_hash, contents_hash=venv_contents_hash)