Skip to content

Commit

Permalink
Restore AtomicDirectory non-locked good behavior. (#1974)
Browse files Browse the repository at this point in the history
This was broken by #1961 which did not account for the two unlocked
uses of AtomicDirectory by the pex.resolver for wheel building and
installing. Although those directories deserve a second look as
candidates for exclusive locking, we have seen no known issues with
those directories since introduction of AtomicDirectory up until #1961.
As such, this change just restores the prior behavior for those two
cases.

Fixes #1968
  • Loading branch information
jsirois authored Nov 3, 2022
1 parent 22e6515 commit 515093b
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 95 deletions.
49 changes: 45 additions & 4 deletions pex/atomic_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import threading
from contextlib import contextmanager
from uuid import uuid4

from pex import pex_warnings
from pex.common import safe_mkdir, safe_rmtree
Expand All @@ -19,10 +20,50 @@


class AtomicDirectory(object):
def __init__(self, target_dir):
# type: (str) -> None
"""A directory whose contents are populated atomically.
By default, an atomic directory allows racing processes to populate a directory atomically.
Each gets its own unique work directory to populate non-atomically, but the final target
directory is swapped to atomically from one of the racing work directories.
In order to lock the atomic directory so that only 1 process works to populate it, use the
`atomic_directory` context manager.
If the target directory will have immutable contents, either approach will do. If not, the
exclusively locked `atomic_directory` context manager should be used.
The common case for a non-obvious mutable directory in Python is any directory `.py` files are
populated to. Those files will later be bytecode compiled to adjacent `.pyc` files on the fly
by the Python interpreter and can go missing underneath a process looking at that directory if
AtomicDirectory is used directly. For the target directory `sys_path_entry` that failure mode
looks like:
Process A -> Starts work to create `sys_path_entry`.
Process B -> Starts work to create `sys_path_entry`.
Process A -> Atomically creates `sys_path_entry`.
Process C -> Sees `sys_path_entry` from Process A and starts running Python code in that dir.
Process D -> Sees `sys_path_entry` from Process A and starts running Python code in that dir.
Process D -> Succeeds importing `foo`.
Process C -> Starts to import `sys_path_entry/foo.py` and Python sees the corresponding .pyc
file already exists (Process D created it).
Process B -> Atomically creates `sys_path_entry`, replacing the result from Process A and
disappearing any `.pyc` files.
Process C -> Goes to import from the `.pyc` file it found and errors since it is gone.
The background facts in this case are that CPython reasonably does a check then act surrounding
.pyc files and uses no lock. It assumes Python source trees will not be disturbed during its
run. Without an exclusively locked `atomic_directory` Pex can allow the check-then-act window to
be observed by racing processes.
"""

def __init__(
self,
target_dir, # type: str
locked=False, # type: bool
):
# type: (...) -> None
self._target_dir = target_dir
self._work_dir = "{}.workdir".format(target_dir)
self._work_dir = "{}.{}.work".format(target_dir, "lck" if locked else uuid4().hex)

@property
def work_dir(self):
Expand Down Expand Up @@ -124,7 +165,7 @@ def atomic_directory(
# We use double-checked locking with the check being target_dir existence and the lock being an
# exclusive blocking file lock.

atomic_dir = AtomicDirectory(target_dir=target_dir)
atomic_dir = AtomicDirectory(target_dir=target_dir, locked=True)
if atomic_dir.is_finalized():
# Our work is already done for us so exit early.
yield atomic_dir
Expand Down
2 changes: 2 additions & 0 deletions pex/venv/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,8 @@ def sys_executable_paths():
# These are used by Pex's Pip venv to implement universal locks.
"_PEX_PYTHON_VERSIONS_FILE",
"_PEX_TARGET_SYSTEMS_FILE",
# This is used as an experiment knob for atomic_directory locking.
"_PEX_FILE_LOCK_STYLE",
)
]
if ignored_pex_env_vars:
Expand Down
115 changes: 114 additions & 1 deletion tests/test_atomic_directory.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,24 @@
# Copyright 2022 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from pex.atomic_directory import FileLockStyle, _is_bsd_lock
import errno
import os
from contextlib import contextmanager

import pytest

from pex.atomic_directory import AtomicDirectory, FileLockStyle, _is_bsd_lock, atomic_directory
from pex.common import temporary_dir, touch
from pex.testing import environment_as
from pex.typing import TYPE_CHECKING

try:
from unittest import mock
except ImportError:
import mock # type: ignore[no-redef,import]

if TYPE_CHECKING:
from typing import Iterator, Optional, Type


def test_is_bsd_lock():
Expand All @@ -26,3 +42,100 @@ def test_is_bsd_lock():
), "Expected the default lock style to be taken from the environment when defined."
assert not _is_bsd_lock(lock_style=FileLockStyle.POSIX)
assert _is_bsd_lock(lock_style=FileLockStyle.BSD)


@contextmanager
def maybe_raises(exception=None):
# type: (Optional[Type[Exception]]) -> Iterator[None]
@contextmanager
def noop():
yield

context = noop() if exception is None else pytest.raises(exception)
with context:
yield


def atomic_directory_finalize_test(errno, expect_raises=None):
# type: (int, Optional[Type[Exception]]) -> None
with mock.patch("os.rename", spec_set=True, autospec=True) as mock_rename:
mock_rename.side_effect = OSError(errno, os.strerror(errno))
with maybe_raises(expect_raises):
AtomicDirectory("to.dir").finalize()


def test_atomic_directory_finalize_eexist():
# type: () -> None
atomic_directory_finalize_test(errno.EEXIST)


def test_atomic_directory_finalize_enotempty():
# type: () -> None
atomic_directory_finalize_test(errno.ENOTEMPTY)


def test_atomic_directory_finalize_eperm():
# type: () -> None
atomic_directory_finalize_test(errno.EPERM, expect_raises=OSError)


def test_atomic_directory_empty_workdir_finalize():
# type: () -> None
with temporary_dir() as sandbox:
target_dir = os.path.join(sandbox, "target_dir")
assert not os.path.exists(target_dir)

with atomic_directory(target_dir) as atomic_dir:
assert not atomic_dir.is_finalized()
assert target_dir == atomic_dir.target_dir
assert os.path.exists(atomic_dir.work_dir)
assert os.path.isdir(atomic_dir.work_dir)
assert [] == os.listdir(atomic_dir.work_dir)

touch(os.path.join(atomic_dir.work_dir, "created"))

assert not os.path.exists(target_dir)

assert not os.path.exists(atomic_dir.work_dir), "The work_dir should always be cleaned up."
assert os.path.exists(os.path.join(target_dir, "created"))


def test_atomic_directory_empty_workdir_failure():
# type: () -> None
class SimulatedRuntimeError(RuntimeError):
pass

with temporary_dir() as sandbox:
target_dir = os.path.join(sandbox, "target_dir")
with pytest.raises(SimulatedRuntimeError):
with atomic_directory(target_dir) as atomic_dir:
assert not atomic_dir.is_finalized()
touch(os.path.join(atomic_dir.work_dir, "created"))
raise SimulatedRuntimeError()

assert not os.path.exists( # type: ignore[unreachable]
atomic_dir.work_dir
), "The work_dir should always be cleaned up."
assert not os.path.exists(target_dir), (
"When the context raises the work_dir it was given should not be moved to the "
"target_dir."
)


def test_atomic_directory_empty_workdir_finalized():
# type: () -> None
with temporary_dir() as target_dir:
with atomic_directory(target_dir) as work_dir:
assert (
work_dir.is_finalized()
), "When the target_dir exists no work_dir should be created."


def test_atomic_directory_locked_mode():
# type: () -> None

assert AtomicDirectory("unlocked").work_dir != AtomicDirectory("unlocked").work_dir
assert (
AtomicDirectory("locked", locked=True).work_dir
== AtomicDirectory("locked", locked=True).work_dir
)
91 changes: 1 addition & 90 deletions tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
import contextlib
import errno
import os
from contextlib import contextmanager

import pytest

from pex.atomic_directory import AtomicDirectory, atomic_directory
from pex.common import (
Chroot,
PermPreservingZipFile,
Expand All @@ -29,94 +27,7 @@
import mock # type: ignore[no-redef,import]

if TYPE_CHECKING:
from typing import Any, Iterator, Optional, Tuple, Type


@contextmanager
def maybe_raises(exception=None):
# type: (Optional[Type[Exception]]) -> Iterator[None]
@contextmanager
def noop():
yield

context = noop() if exception is None else pytest.raises(exception)
with context:
yield


def atomic_directory_finalize_test(errno, expect_raises=None):
# type: (int, Optional[Type[Exception]]) -> None
with mock.patch("os.rename", spec_set=True, autospec=True) as mock_rename:
mock_rename.side_effect = OSError(errno, os.strerror(errno))
with maybe_raises(expect_raises):
AtomicDirectory("to.dir").finalize()


def test_atomic_directory_finalize_eexist():
# type: () -> None
atomic_directory_finalize_test(errno.EEXIST)


def test_atomic_directory_finalize_enotempty():
# type: () -> None
atomic_directory_finalize_test(errno.ENOTEMPTY)


def test_atomic_directory_finalize_eperm():
# type: () -> None
atomic_directory_finalize_test(errno.EPERM, expect_raises=OSError)


def test_atomic_directory_empty_workdir_finalize():
# type: () -> None
with temporary_dir() as sandbox:
target_dir = os.path.join(sandbox, "target_dir")
assert not os.path.exists(target_dir)

with atomic_directory(target_dir) as atomic_dir:
assert not atomic_dir.is_finalized()
assert target_dir == atomic_dir.target_dir
assert os.path.exists(atomic_dir.work_dir)
assert os.path.isdir(atomic_dir.work_dir)
assert [] == os.listdir(atomic_dir.work_dir)

touch(os.path.join(atomic_dir.work_dir, "created"))

assert not os.path.exists(target_dir)

assert not os.path.exists(atomic_dir.work_dir), "The work_dir should always be cleaned up."
assert os.path.exists(os.path.join(target_dir, "created"))


def test_atomic_directory_empty_workdir_failure():
# type: () -> None
class SimulatedRuntimeError(RuntimeError):
pass

with temporary_dir() as sandbox:
target_dir = os.path.join(sandbox, "target_dir")
with pytest.raises(SimulatedRuntimeError):
with atomic_directory(target_dir) as atomic_dir:
assert not atomic_dir.is_finalized()
touch(os.path.join(atomic_dir.work_dir, "created"))
raise SimulatedRuntimeError()

assert not os.path.exists( # type: ignore[unreachable]
atomic_dir.work_dir
), "The work_dir should always be cleaned up."
assert not os.path.exists(target_dir), (
"When the context raises the work_dir it was given should not be moved to the "
"target_dir."
)


def test_atomic_directory_empty_workdir_finalized():
# type: () -> None
with temporary_dir() as target_dir:
with atomic_directory(target_dir) as work_dir:
assert (
work_dir.is_finalized()
), "When the target_dir exists no work_dir should be created."
from typing import Any, Iterator, Tuple


def extract_perms(path):
Expand Down

0 comments on commit 515093b

Please sign in to comment.