Skip to content

Commit

Permalink
Exclude pyc dirs, not include, when hashing code (#2286)
Browse files Browse the repository at this point in the history
This fixes #2285 by fixing a minor typo in
f9a9d94 / #2263 that caused the
computation of a PEXes code hash to not recur into the correct
directories.

I think this explains the symptoms in #2285 due to caching: if a PEX
only contains files in subdirectories, the code hash will be
`da39a3ee5e6b4b0d3255bfef95601890afd80709` (the sha256 hash of no data)
and thus potentially the `venv create` might be referring to an
incorrectly cached installation.
  • Loading branch information
huonw authored Nov 10, 2023
1 parent f90e6ff commit aadfe9c
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 12 deletions.
2 changes: 1 addition & 1 deletion pex/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def pex_code_hash(cls, directory):
hashing.dir_hash(
directory=directory,
digest=digest,
dir_filter=is_pyc_dir,
dir_filter=lambda d: not is_pyc_dir(d),
file_filter=lambda file_path: not is_pyc_file(file_path)
and not file_path.startswith("."),
)
Expand Down
37 changes: 26 additions & 11 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,17 @@
from hashlib import sha1
from textwrap import dedent

import pytest

from pex.common import safe_mkdir, safe_open, temporary_dir, touch
from pex.pex import PEX
from pex.pex_builder import PEXBuilder
from pex.typing import cast
from pex.typing import TYPE_CHECKING, cast
from pex.util import CacheHelper, DistributionHelper, named_temporary_file

if TYPE_CHECKING:
from typing import Callable

try:
from unittest import mock
except ImportError:
Expand Down Expand Up @@ -57,39 +62,49 @@ def test_hash():
assert hash_output == empty_hash.hexdigest()


def test_dir_hash():
# type: () -> None
@pytest.mark.parametrize(
("hasher", "includes_hidden_expected"),
[(CacheHelper.dir_hash, True), (CacheHelper.pex_code_hash, False)],
)
def test_directory_hasher(hasher, includes_hidden_expected):
# type: (Callable[[str], str], bool) -> None
with temporary_dir() as tmp_dir:
safe_mkdir(os.path.join(tmp_dir, "a", "b"))
with safe_open(os.path.join(tmp_dir, "c", "d", "e.py"), "w") as fp:
fp.write("contents1")
with safe_open(os.path.join(tmp_dir, "f.py"), "w") as fp:
fp.write("contents2")
hash1 = CacheHelper.dir_hash(tmp_dir)
hash1 = hasher(tmp_dir)

os.rename(os.path.join(tmp_dir, "c"), os.path.join(tmp_dir, "c-renamed"))
assert hash1 != CacheHelper.dir_hash(tmp_dir)
assert hash1 != hasher(tmp_dir)

os.rename(os.path.join(tmp_dir, "c-renamed"), os.path.join(tmp_dir, "c"))
assert hash1 == CacheHelper.dir_hash(tmp_dir)
assert hash1 == hasher(tmp_dir)

touch(os.path.join(tmp_dir, "c", "d", "e.pyc"))
assert hash1 == CacheHelper.dir_hash(tmp_dir)
assert hash1 == hasher(tmp_dir)
touch(os.path.join(tmp_dir, "c", "d", "e.pyc.123456789"))
assert hash1 == CacheHelper.dir_hash(tmp_dir)
assert hash1 == hasher(tmp_dir)

pycache_dir = os.path.join(tmp_dir, "__pycache__")
safe_mkdir(pycache_dir)
touch(os.path.join(pycache_dir, "f.pyc"))
assert hash1 == CacheHelper.dir_hash(tmp_dir)
assert hash1 == hasher(tmp_dir)
touch(os.path.join(pycache_dir, "f.pyc.123456789"))
assert hash1 == CacheHelper.dir_hash(tmp_dir)
assert hash1 == hasher(tmp_dir)

touch(os.path.join(pycache_dir, "f.py"))
assert hash1 == CacheHelper.dir_hash(
assert hash1 == hasher(
tmp_dir
), "All content under __pycache__ directories should be ignored."

with safe_open(os.path.join(tmp_dir, ".hidden"), "w") as fp:
fp.write("contents3")

includes_hidden = hash1 != hasher(tmp_dir)
assert includes_hidden == includes_hidden_expected


try:
import __builtin__ as python_builtins # type: ignore[import]
Expand Down

0 comments on commit aadfe9c

Please sign in to comment.