Skip to content

Commit

Permalink
Add support for last access time and wheel and venv symlinks.
Browse files Browse the repository at this point in the history
  • Loading branch information
jsirois committed Oct 5, 2024
1 parent 042414d commit b91e18c
Show file tree
Hide file tree
Showing 7 changed files with 362 additions and 95 deletions.
28 changes: 27 additions & 1 deletion pex/cache/access.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,20 @@
from __future__ import absolute_import, print_function

import fcntl
import itertools
import os
import time
from contextlib import contextmanager
from datetime import datetime

from pex.common import safe_mkdir
from pex.typing import TYPE_CHECKING
from pex.variables import ENV

if TYPE_CHECKING:
from typing import Iterator, Optional, Tuple
from typing import Iterator, Optional, Tuple, Union

from pex.cache.dirs import UnzipDir, VenvDir


# N.B.: The lock file path is last in the lock state tuple to allow for a simple encoding scheme in
Expand Down Expand Up @@ -99,3 +104,24 @@ def await_delete_lock():
lock_file = _lock(exclusive=False)
yield lock_file
_lock(exclusive=True)


def record_access(pex_dir):
# type: (Union[UnzipDir, VenvDir]) -> None

# N.B.: We explicitly set atime and do not rely on the filesystem implicitly setting it when the
# directory is read since filesystems may be mounted noatime, nodiratime or relatime on Linux
# and similar toggles exist, at least in part, for some macOS file systems.
atime = time.time()
mtime = os.stat(pex_dir.path).st_mtime
os.utime(pex_dir.path, (atime, mtime))


def last_access_before(cutoff):
# type: (datetime) -> Iterator[Union[UnzipDir, VenvDir]]
pex_dirs = itertools.chain(
UnzipDir.iter_all(), VenvDir.iter_all()
) # type: Iterator[Union[UnzipDir, VenvDir]]
for pex_dir in pex_dirs:
if datetime.fromtimestamp(os.stat(pex_dir.path).st_atime) < cutoff:
yield pex_dir
177 changes: 132 additions & 45 deletions pex/cache/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,24 @@

import os.path
import sqlite3
from contextlib import contextmanager
from contextlib import closing, contextmanager

from pex.atomic_directory import atomic_directory
from pex.cache.dirs import VenvDir
from pex.cache.dirs import (
AtomicCacheDir,
BootstrapDir,
InstalledWheelDir,
UnzipDir,
UserCodeDir,
VenvDir,
)
from pex.common import CopyMode
from pex.dist_metadata import ProjectNameAndVersion
from pex.typing import TYPE_CHECKING
from pex.typing import TYPE_CHECKING, cast
from pex.variables import ENV

if TYPE_CHECKING:
from typing import Iterator, List, Tuple, Union
from typing import Dict, Iterator, List, Optional, Union

from pex.pex_info import PexInfo

Expand All @@ -25,48 +32,46 @@
CREATE TABLE wheels (
name TEXT NOT NULL,
hash TEXT NOT NULL,
install_hash TEXT NOT NULL,
wheel_hash TEXT,
project_name TEXT NOT NULL,
version TEXT NOT NULL,
PRIMARY KEY (name ASC, hash ASC)
PRIMARY KEY (name ASC, install_hash ASC)
) WITHOUT ROWID;
CREATE UNIQUE INDEX wheels_idx_hash ON wheels (hash ASC);
CREATE INDEX wheels_idx_project_name ON wheels (project_name ASC);
CREATE INDEX wheels_idx_version ON wheels (version ASC);
CREATE UNIQUE INDEX wheels_idx_install_hash ON wheels (install_hash ASC);
CREATE INDEX wheels_idx_project_name_version ON wheels (project_name ASC, version ASC);
CREATE TABLE zipapps (
pex_hash TEXT PRIMARY KEY ASC,
bootstrap_hash TEXT NOT NULL,
code_hash TEXT NOT NULL
) WITHOUT ROWID;
CREATE INDEX zipapps_idx_bootstrap_hash ON zipapps (bootstrap_hash ASC);
CREATE INDEX zipapps_idx_code_hash ON zipapps (code_hash ASC);
CREATE TABLE zipapp_deps (
pex_hash TEXT NOT NULL REFERENCES zipapps(pex_hash) ON DELETE CASCADE,
wheel_hash TEXT NOT NULL REFERENCES wheels(hash) ON DELETE CASCADE
wheel_install_hash TEXT NOT NULL REFERENCES wheels(install_hash) ON DELETE CASCADE
);
CREATE INDEX zipapp_deps_idx_pex_hash ON zipapp_deps (pex_hash ASC);
CREATE INDEX zipapp_deps_idx_wheel_hash ON zipapp_deps (wheel_hash ASC);
CREATE INDEX zipapp_deps_idx_wheel_install_hash ON zipapp_deps (wheel_install_hash ASC);
CREATE TABLE venvs (
short_hash TEXT PRIMARY KEY ASC,
pex_hash TEXT NOT NULL,
contents_hash TEXT NOT NULL
) WITHOUT ROWID;
CREATE INDEX venvs_idx_pex_hash ON venvs (pex_hash ASC);
CREATE UNIQUE INDEX venvs_idx_pex_hash_contents_hash ON venvs (pex_hash ASC, contents_hash ASC);
CREATE TABLE venv_deps (
venv_hash TEXT NOT NULL REFERENCES venvs(short_hash) ON DELETE CASCADE,
wheel_hash TEXT NOT NULL REFERENCES wheels(hash) ON DELETE CASCADE
wheel_install_hash TEXT NOT NULL REFERENCES wheels(install_hash) ON DELETE CASCADE
);
CREATE INDEX venv_deps_idx_venv_hash ON venv_deps (venv_hash ASC);
CREATE INDEX venv_deps_idx_wheel_hash ON venv_deps (wheel_hash ASC);
CREATE INDEX venv_deps_idx_wheel_hash ON venv_deps (wheel_install_hash ASC);
"""


@contextmanager
def db_connection():
def _db_connection():
# type: () -> Iterator[sqlite3.Connection]
db_dir = os.path.join(ENV.PEX_ROOT, "data")
with atomic_directory(db_dir) as atomic_dir:
Expand All @@ -87,66 +92,71 @@ def db_connection():
def _inserted_wheels(pex_info):
# type: (PexInfo) -> Iterator[sqlite3.Cursor]

wheels = [] # type: List[Tuple[str, str, str, str]]
for wheel_name, wheel_hash in pex_info.distributions.items():
wheels = [] # type: List[Dict[str, Optional[str]]]
for wheel_name, install_hash in pex_info.distributions.items():
wheel_hash = None # type: Optional[str]
installed_wheel_dir = InstalledWheelDir.create(wheel_name, install_hash)
if os.path.islink(installed_wheel_dir):
wheel_hash_dir, _ = os.path.split(os.path.realpath(installed_wheel_dir))
wheel_hash = os.path.basename(wheel_hash_dir)

pnav = ProjectNameAndVersion.from_filename(wheel_name)
wheels.append(
(
wheel_name,
wheel_hash,
str(pnav.canonicalized_project_name),
str(pnav.canonicalized_version),
dict(
name=wheel_name,
install_hash=install_hash,
wheel_hash=wheel_hash,
project_name=str(pnav.canonicalized_project_name),
version=str(pnav.canonicalized_version),
)
)

with db_connection() as conn:
with _db_connection() as conn:
cursor = conn.executemany(
"""
INSERT OR IGNORE INTO wheels (
INSERT INTO wheels (
name,
hash,
install_hash,
wheel_hash,
project_name,
version
) VALUES (?, ?, ?, ?)
) VALUES (:name, :install_hash, :wheel_hash, :project_name, :version)
ON CONFLICT (name, install_hash) DO UPDATE SET wheel_hash = :wheel_hash
""",
wheels,
)
yield cursor
cursor.close()


def record_zipapp_dependencies(pex_info):
def record_zipapp_install(pex_info):
# type: (PexInfo) -> None

with _inserted_wheels(pex_info) as cursor:
cursor.execute(
"""
INSERT OR IGNORE INTO zipapps (
pex_hash,
code_hash,
bootstrap_hash
bootstrap_hash,
code_hash
) VALUES (?, ?, ?)
""",
(pex_info.pex_hash, pex_info.bootstrap_hash, pex_info.code_hash),
).executemany(
"""
INSERT OR IGNORE INTO zipapp_deps (
pex_hash,
wheel_hash
wheel_install_hash
) VALUES (?, ?)
""",
tuple(
(pex_info.pex_hash, wheel_hash) for wheel_hash in pex_info.distributions.values()
(pex_info.pex_hash, wheel_install_hash)
for wheel_install_hash in pex_info.distributions.values()
),
).close()


def record_zipapp_access(unzip_dir):
# type: (...) -> None
os.utime(unzip_dir, None)


def record_venv_dependencies(
def record_venv_install(
copy_mode, # type: CopyMode.Value
pex_info, # type: PexInfo
venv_dir, # type: VenvDir
Expand All @@ -173,16 +183,93 @@ def record_venv(coon_or_cursor):
"""
INSERT OR IGNORE INTO venv_deps (
venv_hash,
wheel_hash
wheel_install_hash
) VALUES (?, ?)
""",
tuple((venv_hash, wheel_hash) for wheel_hash in pex_info.distributions.values()),
tuple(
(venv_hash, wheel_install_hash)
for wheel_install_hash in pex_info.distributions.values()
),
).close()
else:
with db_connection() as conn:
with _db_connection() as conn:
record_venv(conn).close()


def record_venv_access(venv_dir):
# type: (VenvDir) -> None
os.utime(venv_dir.path, None)
def zipapp_deps(pex_dir):
# type: (UnzipDir) -> Iterator[Union[BootstrapDir, UserCodeDir, str, InstalledWheelDir]]
with _db_connection() as conn:
with closing(
conn.execute(
"SELECT bootstrap_hash, code_hash FROM zipapps WHERE pex_hash = ?",
[pex_dir.pex_hash],
)
) as cursor:
bootstrap_hash, code_hash = cursor.fetchone()
yield BootstrapDir.create(bootstrap_hash)
yield UserCodeDir.create(code_hash)

with closing(
conn.execute(
"""
SELECT name, install_hash, wheel_hash FROM wheels
JOIN zipapp_deps ON zipapp_deps.wheel_install_hash = wheels.install_hash
JOIN zipapps ON zipapps.pex_hash = zipapp_deps.pex_hash
WHERE zipapps.pex_hash = ?
""",
[pex_dir.pex_hash],
)
) as cursor:
for wheel_name, wheel_install_hash, wheel_hash in cursor:
installed_wheel_dir = InstalledWheelDir.create(
wheel_name=wheel_name, wheel_hash=wheel_install_hash
)
if wheel_hash:
yield InstalledWheelDir.create(wheel_name=wheel_name, wheel_hash=wheel_hash)
yield installed_wheel_dir.path
else:
yield installed_wheel_dir


def venv_deps(venv_dir):
# type: (VenvDir) -> Iterator[Union[str, InstalledWheelDir]]
with _db_connection() as conn:
with closing(
conn.execute(
"SELECT short_hash FROM venvs WHERE pex_hash = ? AND contents_hash = ?",
(venv_dir.pex_hash, venv_dir.contents_hash),
)
) as cursor:
short_hash = cast(str, cursor.fetchone()[0])
yield VenvDir.short_path(short_hash, include_symlink=True)

with closing(
conn.execute(
"""
SELECT name, install_hash, wheel_hash FROM wheels
JOIN venv_deps ON venv_deps.wheel_install_hash = wheels.install_hash
JOIN venvs ON venvs.short_hash = venv_deps.venv_hash
WHERE venvs.short_hash = ?
""",
[short_hash],
)
) as cursor:
for wheel_name, wheel_install_hash, wheel_hash in cursor:
installed_wheel_dir = InstalledWheelDir.create(
wheel_name=wheel_name, wheel_hash=wheel_install_hash
)
if wheel_hash:
yield InstalledWheelDir.create(wheel_name=wheel_name, wheel_hash=wheel_hash)
yield installed_wheel_dir.path
else:
yield installed_wheel_dir


def dir_dependencies(pex_dir):
# type: (Union[UnzipDir, VenvDir]) -> Iterator[Union[str, AtomicCacheDir]]
return zipapp_deps(pex_dir) if isinstance(pex_dir, UnzipDir) else venv_deps(pex_dir)


def delete(pex_dir):
# type: (Union[UnzipDir, VenvDir]) -> None
pass
Loading

0 comments on commit b91e18c

Please sign in to comment.