Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restore AtomicDirectory non-locked good behavior. #1974

Merged
merged 5 commits into from
Nov 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 45 additions & 4 deletions pex/atomic_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import threading
from contextlib import contextmanager
from uuid import uuid4

from pex import pex_warnings
from pex.common import safe_mkdir, safe_rmtree
Expand All @@ -19,10 +20,50 @@


class AtomicDirectory(object):
def __init__(self, target_dir):
# type: (str) -> None
"""A directory whose contents are populated atomically.

By default, an atomic directory allows racing processes to populate a directory atomically.
Each gets its own unique work directory to populate non-atomically, but the final target
directory is swapped to atomically from one of the racing work directories.

In order to lock the atomic directory so that only 1 process works to populate it, use the
`atomic_directory` context manager.

If the target directory will have immutable contents, either approach will do. If not, the
exclusively locked `atomic_directory` context manager should be used.

The common case for a non-obvious mutable directory in Python is any directory `.py` files are
populated to. Those files will later be bytecode compiled to adjacent `.pyc` files on the fly
by the Python interpreter and can go missing underneath a process looking at that directory if
AtomicDirectory is used directly. For the target directory `sys_path_entry` that failure mode
looks like:

Process A -> Starts work to create `sys_path_entry`.
Process B -> Starts work to create `sys_path_entry`.
Process A -> Atomically creates `sys_path_entry`.
Process C -> Sees `sys_path_entry` from Process A and starts running Python code in that dir.
Process D -> Sees `sys_path_entry` from Process A and starts running Python code in that dir.
Process D -> Succeeds importing `foo`.
Process C -> Starts to import `sys_path_entry/foo.py` and Python sees the corresponding .pyc
file already exists (Process D created it).
Process B -> Atomically creates `sys_path_entry`, replacing the result from Process A and
disappearing any `.pyc` files.
Process C -> Goes to import from the `.pyc` file it found and errors since it is gone.

The background facts in this case are that CPython reasonably does a check then act surrounding
.pyc files and uses no lock. It assumes Python source trees will not be disturbed during its
run. Without an exclusively locked `atomic_directory` Pex can allow the check-then-act window to
be observed by racing processes.
"""

def __init__(
self,
target_dir, # type: str
locked=False, # type: bool
):
# type: (...) -> None
self._target_dir = target_dir
self._work_dir = "{}.workdir".format(target_dir)
self._work_dir = "{}.{}.work".format(target_dir, "lck" if locked else uuid4().hex)

@property
def work_dir(self):
Expand Down Expand Up @@ -124,7 +165,7 @@ def atomic_directory(
# We use double-checked locking with the check being target_dir existence and the lock being an
# exclusive blocking file lock.

atomic_dir = AtomicDirectory(target_dir=target_dir)
atomic_dir = AtomicDirectory(target_dir=target_dir, locked=True)
if atomic_dir.is_finalized():
# Our work is already done for us so exit early.
yield atomic_dir
Expand Down
2 changes: 2 additions & 0 deletions pex/venv/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,8 @@ def sys_executable_paths():
# These are used by Pex's Pip venv to implement universal locks.
"_PEX_PYTHON_VERSIONS_FILE",
"_PEX_TARGET_SYSTEMS_FILE",
# This is used as an experiment knob for atomic_directory locking.
"_PEX_FILE_LOCK_STYLE",
)
]
if ignored_pex_env_vars:
Expand Down
115 changes: 114 additions & 1 deletion tests/test_atomic_directory.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,24 @@
# Copyright 2022 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from pex.atomic_directory import FileLockStyle, _is_bsd_lock
import errno
import os
from contextlib import contextmanager

import pytest

from pex.atomic_directory import AtomicDirectory, FileLockStyle, _is_bsd_lock, atomic_directory
from pex.common import temporary_dir, touch
from pex.testing import environment_as
from pex.typing import TYPE_CHECKING

try:
from unittest import mock
except ImportError:
import mock # type: ignore[no-redef,import]

if TYPE_CHECKING:
from typing import Iterator, Optional, Type


def test_is_bsd_lock():
Expand All @@ -26,3 +42,100 @@ def test_is_bsd_lock():
), "Expected the default lock style to be taken from the environment when defined."
assert not _is_bsd_lock(lock_style=FileLockStyle.POSIX)
assert _is_bsd_lock(lock_style=FileLockStyle.BSD)


@contextmanager
def maybe_raises(exception=None):
# type: (Optional[Type[Exception]]) -> Iterator[None]
@contextmanager
def noop():
yield

context = noop() if exception is None else pytest.raises(exception)
with context:
yield


def atomic_directory_finalize_test(errno, expect_raises=None):
# type: (int, Optional[Type[Exception]]) -> None
with mock.patch("os.rename", spec_set=True, autospec=True) as mock_rename:
mock_rename.side_effect = OSError(errno, os.strerror(errno))
with maybe_raises(expect_raises):
AtomicDirectory("to.dir").finalize()


def test_atomic_directory_finalize_eexist():
# type: () -> None
atomic_directory_finalize_test(errno.EEXIST)


def test_atomic_directory_finalize_enotempty():
# type: () -> None
atomic_directory_finalize_test(errno.ENOTEMPTY)


def test_atomic_directory_finalize_eperm():
# type: () -> None
atomic_directory_finalize_test(errno.EPERM, expect_raises=OSError)


def test_atomic_directory_empty_workdir_finalize():
# type: () -> None
with temporary_dir() as sandbox:
target_dir = os.path.join(sandbox, "target_dir")
assert not os.path.exists(target_dir)

with atomic_directory(target_dir) as atomic_dir:
assert not atomic_dir.is_finalized()
assert target_dir == atomic_dir.target_dir
assert os.path.exists(atomic_dir.work_dir)
assert os.path.isdir(atomic_dir.work_dir)
assert [] == os.listdir(atomic_dir.work_dir)

touch(os.path.join(atomic_dir.work_dir, "created"))

assert not os.path.exists(target_dir)

assert not os.path.exists(atomic_dir.work_dir), "The work_dir should always be cleaned up."
assert os.path.exists(os.path.join(target_dir, "created"))


def test_atomic_directory_empty_workdir_failure():
# type: () -> None
class SimulatedRuntimeError(RuntimeError):
pass

with temporary_dir() as sandbox:
target_dir = os.path.join(sandbox, "target_dir")
with pytest.raises(SimulatedRuntimeError):
with atomic_directory(target_dir) as atomic_dir:
assert not atomic_dir.is_finalized()
touch(os.path.join(atomic_dir.work_dir, "created"))
raise SimulatedRuntimeError()

assert not os.path.exists( # type: ignore[unreachable]
atomic_dir.work_dir
), "The work_dir should always be cleaned up."
assert not os.path.exists(target_dir), (
"When the context raises the work_dir it was given should not be moved to the "
"target_dir."
)


def test_atomic_directory_empty_workdir_finalized():
# type: () -> None
with temporary_dir() as target_dir:
with atomic_directory(target_dir) as work_dir:
assert (
work_dir.is_finalized()
), "When the target_dir exists no work_dir should be created."


def test_atomic_directory_locked_mode():
# type: () -> None

assert AtomicDirectory("unlocked").work_dir != AtomicDirectory("unlocked").work_dir
assert (
AtomicDirectory("locked", locked=True).work_dir
== AtomicDirectory("locked", locked=True).work_dir
)
91 changes: 1 addition & 90 deletions tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
import contextlib
import errno
import os
from contextlib import contextmanager

import pytest

from pex.atomic_directory import AtomicDirectory, atomic_directory
from pex.common import (
Chroot,
PermPreservingZipFile,
Expand All @@ -29,94 +27,7 @@
import mock # type: ignore[no-redef,import]

if TYPE_CHECKING:
from typing import Any, Iterator, Optional, Tuple, Type


@contextmanager
def maybe_raises(exception=None):
# type: (Optional[Type[Exception]]) -> Iterator[None]
@contextmanager
def noop():
yield

context = noop() if exception is None else pytest.raises(exception)
with context:
yield


def atomic_directory_finalize_test(errno, expect_raises=None):
# type: (int, Optional[Type[Exception]]) -> None
with mock.patch("os.rename", spec_set=True, autospec=True) as mock_rename:
mock_rename.side_effect = OSError(errno, os.strerror(errno))
with maybe_raises(expect_raises):
AtomicDirectory("to.dir").finalize()


def test_atomic_directory_finalize_eexist():
# type: () -> None
atomic_directory_finalize_test(errno.EEXIST)


def test_atomic_directory_finalize_enotempty():
# type: () -> None
atomic_directory_finalize_test(errno.ENOTEMPTY)


def test_atomic_directory_finalize_eperm():
# type: () -> None
atomic_directory_finalize_test(errno.EPERM, expect_raises=OSError)


def test_atomic_directory_empty_workdir_finalize():
# type: () -> None
with temporary_dir() as sandbox:
target_dir = os.path.join(sandbox, "target_dir")
assert not os.path.exists(target_dir)

with atomic_directory(target_dir) as atomic_dir:
assert not atomic_dir.is_finalized()
assert target_dir == atomic_dir.target_dir
assert os.path.exists(atomic_dir.work_dir)
assert os.path.isdir(atomic_dir.work_dir)
assert [] == os.listdir(atomic_dir.work_dir)

touch(os.path.join(atomic_dir.work_dir, "created"))

assert not os.path.exists(target_dir)

assert not os.path.exists(atomic_dir.work_dir), "The work_dir should always be cleaned up."
assert os.path.exists(os.path.join(target_dir, "created"))


def test_atomic_directory_empty_workdir_failure():
# type: () -> None
class SimulatedRuntimeError(RuntimeError):
pass

with temporary_dir() as sandbox:
target_dir = os.path.join(sandbox, "target_dir")
with pytest.raises(SimulatedRuntimeError):
with atomic_directory(target_dir) as atomic_dir:
assert not atomic_dir.is_finalized()
touch(os.path.join(atomic_dir.work_dir, "created"))
raise SimulatedRuntimeError()

assert not os.path.exists( # type: ignore[unreachable]
atomic_dir.work_dir
), "The work_dir should always be cleaned up."
assert not os.path.exists(target_dir), (
"When the context raises the work_dir it was given should not be moved to the "
"target_dir."
)


def test_atomic_directory_empty_workdir_finalized():
# type: () -> None
with temporary_dir() as target_dir:
with atomic_directory(target_dir) as work_dir:
assert (
work_dir.is_finalized()
), "When the target_dir exists no work_dir should be created."
from typing import Any, Iterator, Tuple


def extract_perms(path):
Expand Down