From aadfe9c203140d61c545a84160553397970ddde3 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Fri, 10 Nov 2023 17:02:42 +1100 Subject: [PATCH] Exclude pyc dirs, not include, when hashing code (#2286) This fixes #2285 by fixing a minor typo in f9a9d949d95efd0153452dc255625656d628ee17 / #2263 that caused the computation of a PEXes code hash to not recur into the correct directories. I think this explains the symptoms in #2285 due to caching: if a PEX only contains files in subdirectories, the code hash will be `da39a3ee5e6b4b0d3255bfef95601890afd80709` (the sha256 hash of no data) and thus potentially the `venv create` might be referring to an incorrectly cached installation. --- pex/util.py | 2 +- tests/test_util.py | 37 ++++++++++++++++++++++++++----------- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/pex/util.py b/pex/util.py index 82e6b4112..331af6e98 100644 --- a/pex/util.py +++ b/pex/util.py @@ -87,7 +87,7 @@ def pex_code_hash(cls, directory): hashing.dir_hash( directory=directory, digest=digest, - dir_filter=is_pyc_dir, + dir_filter=lambda d: not is_pyc_dir(d), file_filter=lambda file_path: not is_pyc_file(file_path) and not file_path.startswith("."), ) diff --git a/tests/test_util.py b/tests/test_util.py index 7e1532f43..900d05169 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -6,12 +6,17 @@ from hashlib import sha1 from textwrap import dedent +import pytest + from pex.common import safe_mkdir, safe_open, temporary_dir, touch from pex.pex import PEX from pex.pex_builder import PEXBuilder -from pex.typing import cast +from pex.typing import TYPE_CHECKING, cast from pex.util import CacheHelper, DistributionHelper, named_temporary_file +if TYPE_CHECKING: + from typing import Callable + try: from unittest import mock except ImportError: @@ -57,39 +62,49 @@ def test_hash(): assert hash_output == empty_hash.hexdigest() -def test_dir_hash(): - # type: () -> None +@pytest.mark.parametrize( + ("hasher", "includes_hidden_expected"), + [(CacheHelper.dir_hash, True), (CacheHelper.pex_code_hash, False)], +) +def test_directory_hasher(hasher, includes_hidden_expected): + # type: (Callable[[str], str], bool) -> None with temporary_dir() as tmp_dir: safe_mkdir(os.path.join(tmp_dir, "a", "b")) with safe_open(os.path.join(tmp_dir, "c", "d", "e.py"), "w") as fp: fp.write("contents1") with safe_open(os.path.join(tmp_dir, "f.py"), "w") as fp: fp.write("contents2") - hash1 = CacheHelper.dir_hash(tmp_dir) + hash1 = hasher(tmp_dir) os.rename(os.path.join(tmp_dir, "c"), os.path.join(tmp_dir, "c-renamed")) - assert hash1 != CacheHelper.dir_hash(tmp_dir) + assert hash1 != hasher(tmp_dir) os.rename(os.path.join(tmp_dir, "c-renamed"), os.path.join(tmp_dir, "c")) - assert hash1 == CacheHelper.dir_hash(tmp_dir) + assert hash1 == hasher(tmp_dir) touch(os.path.join(tmp_dir, "c", "d", "e.pyc")) - assert hash1 == CacheHelper.dir_hash(tmp_dir) + assert hash1 == hasher(tmp_dir) touch(os.path.join(tmp_dir, "c", "d", "e.pyc.123456789")) - assert hash1 == CacheHelper.dir_hash(tmp_dir) + assert hash1 == hasher(tmp_dir) pycache_dir = os.path.join(tmp_dir, "__pycache__") safe_mkdir(pycache_dir) touch(os.path.join(pycache_dir, "f.pyc")) - assert hash1 == CacheHelper.dir_hash(tmp_dir) + assert hash1 == hasher(tmp_dir) touch(os.path.join(pycache_dir, "f.pyc.123456789")) - assert hash1 == CacheHelper.dir_hash(tmp_dir) + assert hash1 == hasher(tmp_dir) touch(os.path.join(pycache_dir, "f.py")) - assert hash1 == CacheHelper.dir_hash( + assert hash1 == hasher( tmp_dir ), "All content under __pycache__ directories should be ignored." + with safe_open(os.path.join(tmp_dir, ".hidden"), "w") as fp: + fp.write("contents3") + + includes_hidden = hash1 != hasher(tmp_dir) + assert includes_hidden == includes_hidden_expected + try: import __builtin__ as python_builtins # type: ignore[import]