Skip to content

Commit

Permalink
WIP: track cache use.
Browse files Browse the repository at this point in the history
  • Loading branch information
jsirois committed Oct 3, 2024
1 parent 7d98295 commit 7df265b
Show file tree
Hide file tree
Showing 8 changed files with 252 additions and 9 deletions.
188 changes: 188 additions & 0 deletions pex/cache/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
# Copyright 2024 Pex project contributors.
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import absolute_import

import os.path
import sqlite3
from contextlib import contextmanager

from pex.atomic_directory import atomic_directory
from pex.cache.dirs import VenvDir
from pex.common import CopyMode
from pex.dist_metadata import ProjectNameAndVersion
from pex.typing import TYPE_CHECKING
from pex.variables import ENV

if TYPE_CHECKING:
from typing import Iterator, List, Tuple, Union

from pex.pex_info import PexInfo


_SCHEMA = """
PRAGMA journal_mode=WAL;
CREATE TABLE wheels (
name TEXT NOT NULL,
hash TEXT NOT NULL,
project_name TEXT NOT NULL,
version TEXT NOT NULL,
PRIMARY KEY (name ASC, hash ASC)
) WITHOUT ROWID;
CREATE UNIQUE INDEX wheels_idx_hash ON wheels (hash ASC);
CREATE INDEX wheels_idx_project_name ON wheels (project_name ASC);
CREATE INDEX wheels_idx_version ON wheels (version ASC);
CREATE TABLE zipapps (
pex_hash TEXT PRIMARY KEY ASC,
bootstrap_hash TEXT NOT NULL,
code_hash TEXT NOT NULL
) WITHOUT ROWID;
CREATE INDEX zipapps_idx_bootstrap_hash ON zipapps (bootstrap_hash ASC);
CREATE INDEX zipapps_idx_code_hash ON zipapps (code_hash ASC);
CREATE TABLE zipapp_deps (
pex_hash TEXT NOT NULL REFERENCES zipapps(pex_hash) ON DELETE CASCADE,
wheel_hash TEXT NOT NULL REFERENCES wheels(hash) ON DELETE CASCADE
);
CREATE INDEX zipapp_deps_idx_pex_hash ON zipapp_deps (pex_hash ASC);
CREATE INDEX zipapp_deps_idx_wheel_hash ON zipapp_deps (wheel_hash ASC);
CREATE TABLE venvs (
short_hash TEXT PRIMARY KEY ASC,
pex_hash TEXT NOT NULL,
contents_hash TEXT NOT NULL
) WITHOUT ROWID;
CREATE INDEX venvs_idx_pex_hash ON venvs (pex_hash ASC);
CREATE TABLE venv_deps (
venv_hash TEXT NOT NULL REFERENCES venvs(short_hash) ON DELETE CASCADE,
wheel_hash TEXT NOT NULL REFERENCES wheels(hash) ON DELETE CASCADE
);
CREATE INDEX venv_deps_idx_venv_hash ON venv_deps (venv_hash ASC);
CREATE INDEX venv_deps_idx_wheel_hash ON venv_deps (wheel_hash ASC);
"""


@contextmanager
def db_connection():
# type: () -> Iterator[sqlite3.Connection]
db_dir = os.path.join(ENV.PEX_ROOT, "data")
with atomic_directory(db_dir) as atomic_dir:
if not atomic_dir.is_finalized():
with sqlite3.connect(os.path.join(atomic_dir.work_dir, "cache.db")) as conn:
conn.executescript(_SCHEMA).close()
with sqlite3.connect(os.path.join(db_dir, "cache.db")) as conn:
conn.executescript(
"""
PRAGMA synchronous=NORMAL;
PRAGMA foreign_keys=ON;
"""
).close()
yield conn


@contextmanager
def _inserted_wheels(pex_info):
# type: (PexInfo) -> Iterator[sqlite3.Cursor]

wheels = [] # type: List[Tuple[str, str, str, str]]
for wheel_name, wheel_hash in pex_info.distributions.items():
pnav = ProjectNameAndVersion.from_filename(wheel_name)
wheels.append(
(
wheel_name,
wheel_hash,
str(pnav.canonicalized_project_name),
str(pnav.canonicalized_version),
)
)

with db_connection() as conn:
cursor = conn.executemany(
"""
INSERT OR IGNORE INTO wheels (
name,
hash,
project_name,
version
) VALUES (?, ?, ?, ?)
""",
wheels,
)
yield cursor
cursor.close()


def record_zipapp_dependencies(pex_info):
# type: (PexInfo) -> None

with _inserted_wheels(pex_info) as cursor:
cursor.execute(
"""
INSERT OR IGNORE INTO zipapps (
pex_hash,
code_hash,
bootstrap_hash
) VALUES (?, ?, ?)
""",
(pex_info.pex_hash, pex_info.bootstrap_hash, pex_info.code_hash),
).executemany(
"""
INSERT OR IGNORE INTO zipapp_deps (
pex_hash,
wheel_hash
) VALUES (?, ?)
""",
tuple(
(pex_info.pex_hash, wheel_hash) for wheel_hash in pex_info.distributions.values()
),
).close()


def record_zipapp_access(unzip_dir):
# type: (...) -> None
os.utime(unzip_dir, None)


def record_venv_dependencies(
copy_mode, # type: CopyMode.Value
pex_info, # type: PexInfo
venv_dir, # type: VenvDir
venv_hash, # type: str
):
# type: (...) -> None

def record_venv(coon_or_cursor):
# type: (Union[sqlite3.Connection, sqlite3.Cursor]) -> sqlite3.Cursor
return coon_or_cursor.execute(
"""
INSERT OR IGNORE INTO venvs (
short_hash,
pex_hash,
contents_hash
) VALUES (?, ?, ?)
""",
(venv_hash, venv_dir.pex_hash, venv_dir.contents_hash),
)

if copy_mode is CopyMode.SYMLINK:
with _inserted_wheels(pex_info) as cursor:
record_venv(cursor).executemany(
"""
INSERT OR IGNORE INTO venv_deps (
venv_hash,
wheel_hash
) VALUES (?, ?)
""",
tuple((venv_hash, wheel_hash) for wheel_hash in pex_info.distributions.values()),
).close()
else:
with db_connection() as conn:
record_venv(conn).close()


def record_venv_access(venv_dir):
# type: (VenvDir) -> None
os.utime(venv_dir.path, None)
26 changes: 25 additions & 1 deletion pex/cache/dirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,37 @@
import os

from pex.enum import Enum
from pex.typing import TYPE_CHECKING
from pex.typing import TYPE_CHECKING, cast
from pex.variables import ENV, Variables

if TYPE_CHECKING:
from typing import Iterable, Iterator, Union


class VenvDir(str):
@staticmethod
def __new__(
cls,
path, # type: str
pex_hash, # type: str
contents_hash, # type: str
):
# type: (...) -> VenvDir
# MyPy incorrectly flags the call to super(VenvDir, cls).__new__(cls, path) for Python 2.7.
return cast(VenvDir, super(VenvDir, cls).__new__(cls, path)) # type: ignore[call-arg]

def __init__(
self,
path, # type: str
pex_hash, # type: str
contents_hash, # type: str
):
# type: (...) -> None
self.path = path
self.pex_hash = pex_hash
self.contents_hash = contents_hash


class CacheDir(Enum["CacheDir.Value"]):
class Value(Enum.Value):
def __init__(
Expand Down
10 changes: 10 additions & 0 deletions pex/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from pex.atomic_directory import atomic_directory
from pex.cache import access as cache_access
from pex.cache.data import record_zipapp_access, record_zipapp_dependencies
from pex.cache.dirs import CacheDir
from pex.common import ZipFileEx, is_script, open_zip, safe_copy, safe_mkdir, safe_mkdtemp
from pex.enum import Enum
Expand Down Expand Up @@ -313,6 +314,8 @@ def _ensure_installed(
if not os.path.exists(install_to):
with ENV.patch(PEX_ROOT=pex_root):
cache_access.read_write()
else:
record_zipapp_access(unzip_dir=install_to)
with atomic_directory(install_to) as chroot:
if not chroot.is_finalized():
with ENV.patch(PEX_ROOT=pex_root), TRACER.timed(
Expand Down Expand Up @@ -367,6 +370,13 @@ def _ensure_installed(
layout.extract_pex_info(chroot.work_dir)
layout.extract_main(chroot.work_dir)
layout.record(chroot.work_dir)

with TRACER.timed(
"Recording zipapp install of {pex} {hash}".format(
pex=pex, hash=pex_info.pex_hash
)
):
record_zipapp_dependencies(pex_info)
return install_to


Expand Down
15 changes: 15 additions & 0 deletions pex/pex_bootstrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pex import pex_warnings
from pex.atomic_directory import atomic_directory
from pex.cache import access as cache_access
from pex.cache.data import record_venv_access, record_venv_dependencies
from pex.cache.dirs import CacheDir
from pex.common import CopyMode, die, pluralize
from pex.environment import ResolveError
Expand Down Expand Up @@ -523,6 +524,8 @@ def ensure_venv(
if not os.path.exists(venv_dir):
with ENV.patch(PEX_ROOT=pex_info.pex_root):
cache_access.read_write()
else:
record_venv_access(venv_dir=venv_dir)
with atomic_directory(venv_dir) as venv:
if not venv.is_finalized():
from pex.venv.virtualenv import Virtualenv
Expand Down Expand Up @@ -594,6 +597,18 @@ def ensure_venv(
hermetic_scripts=pex_info.venv_hermetic_scripts,
)

with TRACER.timed(
"Recording venv install of {pex} {hash}".format(
pex=pex.path(), hash=pex_info.pex_hash
)
):
record_venv_dependencies(
copy_mode=copy_mode,
pex_info=pex_info,
venv_dir=venv_dir,
venv_hash=entropy,
)

# There are popular Linux distributions with shebang length limits
# (BINPRM_BUF_SIZE in /usr/include/linux/binfmts.h) set at 128 characters, so
# we warn in the _very_ unlikely case that our shortened shebang is longer than
Expand Down
7 changes: 4 additions & 3 deletions pex/pex_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from typing import Collection # type: ignore[attr-defined]
from typing import Any, Dict, Iterable, Mapping, Optional, Text, Tuple, Union

from pex.cache.dirs import VenvDir
from pex.dist_metadata import Requirement

# N.B.: These are expensive imports and PexInfo is used during PEX bootstrapping which we want
Expand Down Expand Up @@ -275,7 +276,7 @@ def _venv_dir(
interpreter=None, # type: Optional[PythonInterpreter]
expand_pex_root=True, # type: bool
):
# type: (...) -> Optional[str]
# type: (...) -> Optional[VenvDir]
if not self.venv:
return None
if self.pex_hash is None:
Expand All @@ -295,15 +296,15 @@ def runtime_venv_dir(
pex_file, # type: str
interpreter=None, # type: Optional[PythonInterpreter]
):
# type: (...) -> Optional[str]
# type: (...) -> Optional[VenvDir]
return self._venv_dir(self.pex_root, pex_file, interpreter)

def raw_venv_dir(
self,
pex_file, # type: str
interpreter=None, # type: Optional[PythonInterpreter]
):
# type: (...) -> Optional[str]
# type: (...) -> Optional[VenvDir]
"""Distiguished from ``venv_dir`` by use of the raw_pex_root.
We don't expand the pex_root at build time in case the pex_root is not
writable or doesn't exist at build time.
Expand Down
1 change: 1 addition & 0 deletions pex/pip/installation.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def _pip_installation(

isolated_pip_builder = PEXBuilder(path=chroot.work_dir)
isolated_pip_builder.info.venv = True
isolated_pip_builder.info.venv_site_packages_copies = True
for dist_location in iter_distribution_locations():
isolated_pip_builder.add_dist_location(dist=dist_location)
with named_temporary_file(prefix="", suffix=".py", mode="w") as fp:
Expand Down
2 changes: 1 addition & 1 deletion pex/sh_boot.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def create_sh_boot_script(

venv_dir = pex_info.raw_venv_dir(pex_file=pex_name, interpreter=interpreter)
if venv_dir:
pex_installed_path = venv_dir
pex_installed_path = venv_dir.path
else:
pex_hash = pex_info.pex_hash
if pex_hash is None:
Expand Down
12 changes: 8 additions & 4 deletions pex/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@
_O = TypeVar("_O")
_P = TypeVar("_P")

# N.B.: This is an expensive import and we only need it for type checking.
# N.B.: This import is circular, and we import lazily below as a result, but we also need the
# import eagerly for type checking.
from pex.cache.dirs import VenvDir # noqa

# N.B.: This is an expensive import, and we only need it for type checking.
from pex.interpreter import PythonInterpreter


Expand Down Expand Up @@ -823,10 +827,10 @@ def venv_dir(
pex_path=(), # type: Tuple[str, ...]
expand_pex_root=True, # type: bool
):
# type: (...) -> str
# type: (...) -> VenvDir

# N.B.: We need lazy import gymnastics here since CacheType uses Variables for PEX_ROOT.
from pex.cache.dirs import CacheDir
from pex.cache.dirs import CacheDir, VenvDir

# The venv contents are affected by which PEX files are in play as well as which interpreter
# is selected. The former is influenced via PEX_PATH and the latter is influenced by interpreter
Expand Down Expand Up @@ -944,4 +948,4 @@ def warn(message):
)
)

return venv_path
return VenvDir(path=venv_path, pex_hash=pex_hash, contents_hash=venv_contents_hash)

0 comments on commit 7df265b

Please sign in to comment.