Skip to content

Commit

Permalink
Tighten up user code handling.
Browse files Browse the repository at this point in the history
Now only user code is extracted, and only if there is any.
  • Loading branch information
jsirois committed Nov 3, 2024
1 parent a9aa11f commit 3210cfd
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 26 deletions.
7 changes: 4 additions & 3 deletions pex/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,10 +200,11 @@ def dir_hash(
def iter_files():
# type: () -> Iterator[Text]
for root, dirs, files in os.walk(top, followlinks=True):
dirs[:] = [d for d in dirs if dir_filter(d)]
dirs[:] = [d for d in dirs if dir_filter(os.path.join(root, d))]
for f in files:
if file_filter(f):
yield os.path.join(root, f)
path = os.path.join(root, f)
if file_filter(path):
yield path

file_paths = sorted(iter_files())

Expand Down
50 changes: 35 additions & 15 deletions pex/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ def extract_main(self, dest_dir):
# type: (str) -> None
raise NotImplementedError()

@abstractmethod
def extract_import_hook(self, dest_dir):
# type: (str) -> None
raise NotImplementedError()

def record(self, dest_dir):
# type: (str) -> None
self._layout.record(dest_dir)
Expand Down Expand Up @@ -334,11 +339,6 @@ def _ensure_installed(
bootstrap_cache = BootstrapDir.create(
pex_info.bootstrap_hash, pex_root=pex_info.pex_root
)
if pex_info.code_hash is None:
raise AssertionError(
"Expected code_hash to be populated for {}.".format(layout)
)
code_cache = UserCodeDir.create(pex_info.code_hash, pex_root=pex_info.pex_root)

with atomic_directory(
bootstrap_cache, source=layout.bootstrap_strip_prefix()
Expand All @@ -357,14 +357,18 @@ def _ensure_installed(
install_to=install_to,
)

with atomic_directory(code_cache) as code_chroot:
if not code_chroot.is_finalized():
layout.extract_code(code_chroot.work_dir)
for path in os.listdir(code_cache):
os.symlink(
os.path.join(os.path.relpath(code_cache, install_to), path),
os.path.join(chroot.work_dir, path),
if pex_info.code_hash:
code_cache = UserCodeDir.create(
pex_info.code_hash, pex_root=pex_info.pex_root
)
with atomic_directory(code_cache) as code_chroot:
if not code_chroot.is_finalized():
layout.extract_code(code_chroot.work_dir)
for path in os.listdir(code_cache):
os.symlink(
os.path.join(os.path.relpath(code_cache, install_to), path),
os.path.join(chroot.work_dir, path),
)

layout.extract_pex_info(chroot.work_dir)
layout.extract_main(chroot.work_dir)
Expand Down Expand Up @@ -485,7 +489,7 @@ def extract_code(self, dest_dir):
# type: (str) -> None
for name in self.names:
if name not in ("__main__.py", PEX_INFO_PATH) and not name.startswith(
(BOOTSTRAP_DIR, DEPS_DIR)
("__pex__", BOOTSTRAP_DIR, DEPS_DIR)
):
self.zfp.extract(name, dest_dir)

Expand All @@ -497,6 +501,10 @@ def extract_main(self, dest_dir):
# type: (str) -> None
self.zfp.extract("__main__.py", dest_dir)

def extract_import_hook(self, dest_dir):
# type: (str) -> None
self.zfp.extract("__pex__/__init__.py", dest_dir)

def __str__(self):
return "PEX zipfile {}".format(self.path)

Expand Down Expand Up @@ -546,7 +554,7 @@ def extract_code(self, dest_dir):
for root, dirs, files in os.walk(self._path):
rel_root = os.path.relpath(root, self._path)
if root == self._path:
dirs[:] = [d for d in dirs if d != DEPS_DIR]
dirs[:] = [d for d in dirs if d not in ("__pex__", DEPS_DIR)]
files[:] = [
f for f in files if f not in ("__main__.py", PEX_INFO_PATH, BOOTSTRAP_DIR)
]
Expand All @@ -566,6 +574,12 @@ def extract_main(self, dest_dir):
# type: (str) -> None
safe_copy(os.path.join(self._path, "__main__.py"), os.path.join(dest_dir, "__main__.py"))

def extract_import_hook(self, dest_dir):
# type: (str) -> None
dest = os.path.join(dest_dir, "__pex__", "__init__.py")
safe_mkdir(os.path.dirname(dest))
safe_copy(os.path.join(self._path, "__pex__", "__init__.py"), dest)

def __str__(self):
return "Spread PEX directory {}".format(self._path)

Expand Down Expand Up @@ -617,7 +631,7 @@ def extract_code(self, dest_dir):
for root, dirs, files in os.walk(self._path):
rel_root = os.path.relpath(root, self._path)
if root == self._path:
dirs[:] = [d for d in dirs if d not in (DEPS_DIR, BOOTSTRAP_DIR)]
dirs[:] = [d for d in dirs if d not in ("__pex__", DEPS_DIR, BOOTSTRAP_DIR)]
files[:] = [f for f in files if f not in ("__main__.py", PEX_INFO_PATH)]
for d in dirs:
safe_mkdir(os.path.join(dest_dir, rel_root, d))
Expand All @@ -635,6 +649,12 @@ def extract_main(self, dest_dir):
# type: (str) -> None
safe_copy(os.path.join(self._path, "__main__.py"), os.path.join(dest_dir, "__main__.py"))

def extract_import_hook(self, dest_dir):
# type: (str) -> None
dest = os.path.join(dest_dir, "__pex__", "__init__.py")
safe_mkdir(os.path.dirname(dest))
safe_copy(os.path.join(self._path, "__pex__", "__init__.py"), dest)

def __str__(self):
return "Loose PEX directory {}".format(self._path)

Expand Down
6 changes: 5 additions & 1 deletion pex/pex_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,12 @@ def _precompile_source(self):
self._chroot.touch(compiled, label="bytecode")

def _prepare_code(self):
chroot_path = self._chroot.path()
self._pex_info.code_hash = CacheHelper.pex_code_hash(
self._chroot.path(), exclude_dirs=(layout.BOOTSTRAP_DIR, layout.DEPS_DIR)
chroot_path,
exclude_dirs=tuple(
os.path.join(chroot_path, d) for d in (layout.BOOTSTRAP_DIR, layout.DEPS_DIR)
),
)
self._pex_info.pex_hash = hashlib.sha1(self._pex_info.dump().encode("utf-8")).hexdigest()
self._chroot.write(self._pex_info.dump().encode("utf-8"), PexInfo.PATH, label="manifest")
Expand Down
6 changes: 5 additions & 1 deletion pex/pip/vcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ def digest_vcs_archive(
hashing.dir_hash(
directory=chroot,
digest=digest,
dir_filter=lambda dir_path: not is_pyc_dir(dir_path) and dir_path != vcs_control_dir,
dir_filter=(
lambda dir_path: (
not is_pyc_dir(dir_path) and os.path.basename(dir_path) != vcs_control_dir
)
),
file_filter=lambda f: not is_pyc_file(f),
)
22 changes: 17 additions & 5 deletions pex/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ def access_zipped_assets(cls, static_module_name, static_path, dir_location=None


class CacheHelper(object):
_EMPTY_CODE_HASH = hashlib.sha1().hexdigest()

@classmethod
def hash(cls, path, digest=None, hasher=sha1):
# type: (Text, Optional[Hasher], Callable[[], Hasher]) -> str
Expand All @@ -84,18 +86,28 @@ def pex_code_hash(
cls,
directory,
exclude_dirs=(), # type: Container[str]
exclude_files=(), # type: Container[str]
):
# type: (...) -> str
"""Return a reproducible hash of the contents of a loose PEX; excluding all `.pyc` files."""
# type: (...) -> Optional[str]
"""Return a reproducible hash of the user code of a loose PEX; excluding all `.pyc` files.
If no code is found, `None` is returned.
"""
digest = hashlib.sha1()
hashing.dir_hash(
directory=directory,
digest=digest,
dir_filter=lambda d: not is_pyc_dir(d) and d not in exclude_dirs,
file_filter=lambda file_path: not is_pyc_file(file_path)
and not file_path.startswith("."),
file_filter=(
lambda f: (
not is_pyc_file(f)
and not os.path.basename(f).startswith(".")
and f not in exclude_files
)
),
)
return digest.hexdigest()
code_hash = digest.hexdigest()
return None if code_hash == cls._EMPTY_CODE_HASH else code_hash

@classmethod
def dir_hash(cls, directory, digest=None, hasher=sha1):
Expand Down
4 changes: 3 additions & 1 deletion tests/integration/cli/commands/test_cache_prune.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,9 @@ def test_installed_wheel_prune_run_time(
assert DiskUsage.collect(CacheDir.INSTALLED_WHEELS.path()).size > 0
assert DiskUsage.collect(CacheDir.UNZIPPED_PEXES.path()).size > 0
assert DiskUsage.collect(CacheDir.BOOTSTRAPS.path()).size > 0
assert DiskUsage.collect(CacheDir.USER_CODE.path()).size > 0
assert (
0 == DiskUsage.collect(CacheDir.USER_CODE.path()).size
), "There is no user code in the PEX."
assert (
pre_prune_du.size > pex_size
), "Expected the unzipped PEX to be larger than the zipped pex."
Expand Down

0 comments on commit 3210cfd

Please sign in to comment.