Skip to content

Commit

Permalink
Improve PythonInterpreter venv support.
Browse files Browse the repository at this point in the history
This fixes binary canonicalization to handle virtual environments
created with virtualenv instead of pyvenv. It also adds support for
resolving the base interpreter used to build a virtual environment.

The ability to resolve a virtual environment intepreter will be used to
fix pex-tool#1031 where virtual environments created with
`--system-site-packages` leak those packages through as regular sys.path
entries otherwise undetectable by PEX.

Work towards pex-tool#962 and pex-tool#1115.
  • Loading branch information
jsirois committed Dec 11, 2020
1 parent a1b51bb commit a316163
Show file tree
Hide file tree
Showing 4 changed files with 204 additions and 21 deletions.
158 changes: 146 additions & 12 deletions pex/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from textwrap import dedent

from pex import third_party
from pex.common import safe_rmtree
from pex.common import is_exe, safe_rmtree
from pex.compatibility import string
from pex.executor import Executor
from pex.jobs import ErrorHandler, Job, Retain, SpawnedJob, execute_parallel
Expand All @@ -34,6 +34,7 @@
Dict,
Iterable,
Iterator,
List,
MutableMapping,
Optional,
Sequence,
Expand Down Expand Up @@ -95,6 +96,14 @@ def get(cls, binary=None):
preferred_tag = supported_tags[0]
return cls(
binary=binary or sys.executable,
prefix=sys.prefix,
base_prefix=(
# Old virtualenv (16 series and lower) sets `sys.real_prefix` in all cases.
getattr(sys, "real_prefix", None)
# Both pyvenv and virtualenv 20+ set `sys.base_prefix` as per
# https://www.python.org/dev/peps/pep-0405/.
or getattr(sys, "base_prefix", sys.prefix)
),
python_tag=preferred_tag.interpreter,
abi_tag=preferred_tag.abi,
platform_tag=preferred_tag.platform,
Expand All @@ -107,7 +116,7 @@ def get(cls, binary=None):
def decode(cls, encoded):
TRACER.log("creating PythonIdentity from encoded: %s" % encoded, V=9)
values = json.loads(encoded)
if len(values) != 7:
if len(values) != 9:
raise cls.InvalidError("Invalid interpreter identity: %s" % encoded)

supported_tags = values.pop("supported_tags")
Expand All @@ -126,13 +135,25 @@ def _find_interpreter_name(cls, python_tag):
raise ValueError("Unknown interpreter: {}".format(python_tag))

def __init__(
self, binary, python_tag, abi_tag, platform_tag, version, supported_tags, env_markers
self,
binary, # type: str
prefix, # type: str
base_prefix, # type: str
python_tag, # type: str
abi_tag, # type: str
platform_tag, # type: str
version, # type: Iterable[int]
supported_tags, # type: Iterable[tags.Tag]
env_markers, # type: Dict[str, str]
):
# type: (...) -> None
# N.B.: We keep this mapping to support historical values for `distribution` and `requirement`
# properties.
self._interpreter_name = self._find_interpreter_name(python_tag)

self._binary = binary
self._prefix = prefix
self._base_prefix = base_prefix
self._python_tag = python_tag
self._abi_tag = abi_tag
self._platform_tag = platform_tag
Expand All @@ -143,6 +164,8 @@ def __init__(
def encode(self):
values = dict(
binary=self._binary,
prefix=self._prefix,
base_prefix=self._base_prefix,
python_tag=self._python_tag,
abi_tag=self._abi_tag,
platform_tag=self._platform_tag,
Expand All @@ -158,6 +181,16 @@ def encode(self):
def binary(self):
return self._binary

@property
def prefix(self):
# type: () -> str
return self._prefix

@property
def base_prefix(self):
# type: () -> str
return self._base_prefix

@property
def python_tag(self):
return self._python_tag
Expand Down Expand Up @@ -308,7 +341,7 @@ class PythonInterpreter(object):
_PYTHON_INTERPRETER_BY_NORMALIZED_PATH = {} # type: Dict

@staticmethod
def _read_pyvenv_home(path):
def _get_pyvenv_cfg(path):
# type: (str) -> Optional[str]
# See: https://www.python.org/dev/peps/pep-0405/#specification
pyvenv_cfg_path = os.path.join(path, "pyvenv.cfg")
Expand All @@ -317,11 +350,11 @@ def _read_pyvenv_home(path):
for line in fp:
name, _, value = line.partition("=")
if name.strip() == "home":
return value.strip()
return pyvenv_cfg_path
return None

@classmethod
def _find_pyvenv_home(cls, maybe_venv_python_binary):
def _find_pyvenv_cfg(cls, maybe_venv_python_binary):
# type: (str) -> Optional[str]
# A pyvenv is identified by a pyvenv.cfg file with a home key in one of the two following
# directory layouts:
Expand All @@ -340,11 +373,11 @@ def _find_pyvenv_home(cls, maybe_venv_python_binary):
#
# See: # See: https://www.python.org/dev/peps/pep-0405/#specification
maybe_venv_bin_dir = os.path.dirname(maybe_venv_python_binary)
home_dir = cls._read_pyvenv_home(maybe_venv_bin_dir)
if not home_dir:
pyvenv_cfg = cls._get_pyvenv_cfg(maybe_venv_bin_dir)
if not pyvenv_cfg:
maybe_venv_dir = os.path.dirname(maybe_venv_bin_dir)
home_dir = cls._read_pyvenv_home(maybe_venv_dir)
return home_dir
pyvenv_cfg = cls._get_pyvenv_cfg(maybe_venv_dir)
return pyvenv_cfg

@classmethod
def _resolve_pyvenv_canonical_python_binary(
Expand All @@ -357,8 +390,8 @@ def _resolve_pyvenv_canonical_python_binary(
if not os.path.islink(maybe_venv_python_binary):
return None

home_dir = cls._find_pyvenv_home(maybe_venv_python_binary)
if os.path.dirname(real_binary) != home_dir:
pyvenv_cfg = cls._find_pyvenv_cfg(maybe_venv_python_binary)
if pyvenv_cfg is None:
return None

while os.path.islink(maybe_venv_python_binary):
Expand Down Expand Up @@ -798,8 +831,109 @@ def __init__(self, identity):

@property
def binary(self):
# type: () -> str
return self._binary

@property
def is_venv(self):
# type: () -> bool
"""Return `True` if this interpreter is homed in a virtual environment."""
return self._identity.prefix != self._identity.base_prefix

@property
def prefix(self):
# type: () -> str
"""Return the `sys.prefix` of this interpreter.
For virtual environments, this will be the virtual environment directory itself.
"""
return self._identity.prefix

class BaseInterpreterResolutionError(Exception):
"""Indicates the base interpreter for a virtual environment could not be resolved."""

def resolve_base_interpreter(self):
# type: () -> PythonInterpreter
"""Finds the base system interpreter used to create a virtual environment.
If this interpreter is not homed in a virtual environment, returns itself.
"""
if not self.is_venv:
return self

# In the case of PyPy, the <base_prefix> dir might contain one of the following:
#
# 1. On a system with PyPy 2.7 series and one PyPy 3.x series
# bin/
# pypy
# pypy3
#
# 2. On a system with PyPy 2.7 series and more than one PyPy 3.x series
# bin/
# pypy
# pypy3
# pypy3.6
# pypy3.7
#
# In both cases, bin/pypy is a 2.7 series interpreter. In case 2 bin/pypy3 could be either
# PyPy 3.6 series or PyPy 3.7 series. In order to ensure we pick the correct base executable
# of a PyPy virtual environment, we always try to resolve the most specific basename first
# to the least specific basename last and we also verify that, if the basename resolves, it
# resolves to an equivalent interpreter. We employ the same strategy for CPython, but only
# for uniformity in the algorithm. It appears to always be the case for CPython that
# python<major>.<minor> is present in any given <prefix>/bin/ directory; so the algorithm
# gets a hit on 1st try for CPython binaries incurring ~no extra overhead.

version = self._identity.version
abi_tag = self._identity.abi_tag

prefix = "pypy" if self._identity.interpreter == "PyPy" else "python"
suffixes = ("{}.{}".format(version[0], version[1]), str(version[0]), "")
candidate_binaries = tuple("{}{}".format(prefix, suffix) for suffix in suffixes)

def iter_base_candidate_binariy_paths(interpreter):
# type: (PythonInterpreter) -> Iterator[str]
bin_dir = os.path.join(interpreter._identity.base_prefix, "bin")
for candidate_binary in candidate_binaries:
candidate_binary_path = os.path.join(bin_dir, candidate_binary)
if is_exe(candidate_binary_path):
yield candidate_binary_path

def is_same_interpreter(interpreter):
# type: (PythonInterpreter) -> bool
identity = interpreter._identity
return identity.version == version and identity.abi_tag == abi_tag

resolution_path = [] # type: List[str]
base_interpreter = self
while base_interpreter.is_venv:
resolved = None # type: Optional[PythonInterpreter]
for candidate_path in iter_base_candidate_binariy_paths(base_interpreter):
resolved_interpreter = self.from_binary(candidate_path)
if is_same_interpreter(resolved_interpreter):
resolved = resolved_interpreter
break
if resolved is None:
message = [
"Failed to resolve the base interpreter for the virtual environment at "
"{venv_dir}.".format(venv_dir=self._identity.prefix)
]
if resolution_path:
message.append(
"Resolved through {path}".format(
path=" -> ".join(binary for binary in resolution_path)
)
)
message.append(
"Search of base_prefix {} found no equivalent interpreter for {}".format(
base_interpreter._identity.base_prefix, base_interpreter._binary
)
)
raise self.BaseInterpreterResolutionError("\n".join(message))
base_interpreter = resolved_interpreter
resolution_path.append(base_interpreter.binary)
return base_interpreter

@property
def identity(self):
# type: () -> PythonIdentity
Expand Down
8 changes: 5 additions & 3 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -1922,7 +1922,9 @@ def add_to_path(entry):
assert "PEX_PYTHON_PATH" not in final_env
assert "_PEX_SHOULD_EXIT_BOOTSTRAP_REEXEC" not in final_env

expected_exec_chain = [os.path.realpath(i) for i in [sys.executable] + (exec_chain or [])]
expected_exec_chain = [
PythonInterpreter.from_binary(i).binary for i in [sys.executable] + (exec_chain or [])
]
assert expected_exec_chain == final_env["_PEX_EXEC_CHAIN"].split(os.pathsep)


Expand All @@ -1933,7 +1935,7 @@ def test_pex_no_reexec_no_constraints():

def test_pex_reexec_no_constraints_pythonpath_present():
# type: () -> None
_assert_exec_chain(exec_chain=[os.path.realpath(sys.executable)], pythonpath=["."])
_assert_exec_chain(exec_chain=[sys.executable], pythonpath=["."])


def test_pex_no_reexec_constraints_match_current():
Expand All @@ -1946,7 +1948,7 @@ def test_pex_reexec_constraints_match_current_pythonpath_present():
# type: () -> None
current_version = ".".join(str(component) for component in sys.version_info[0:3])
_assert_exec_chain(
exec_chain=[os.path.realpath(sys.executable)],
exec_chain=[sys.executable],
pythonpath=["."],
interpreter_constraints=["=={}".format(current_version)],
)
Expand Down
52 changes: 49 additions & 3 deletions tests/test_interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@
import pytest

from pex import interpreter
from pex.common import temporary_dir, touch
from pex.common import safe_mkdtemp, temporary_dir, touch
from pex.compatibility import PY3
from pex.executor import Executor
from pex.interpreter import PythonInterpreter
from pex.testing import (
PY27,
PY35,
PY36,
PY_VER,
ensure_python_distribution,
ensure_python_interpreter,
environment_as,
Expand All @@ -30,7 +31,7 @@
from unittest.mock import Mock, patch # type: ignore[misc,no-redef,import]

if TYPE_CHECKING:
from typing import Iterator, Tuple, Union, Any
from typing import Iterator, Tuple, Union, Any, List

InterpreterIdentificationError = Tuple[str, str]
InterpreterOrError = Union[PythonInterpreter, InterpreterIdentificationError]
Expand Down Expand Up @@ -255,7 +256,7 @@ def assert_chosen(expected_version, other_version):
assert_chosen(expected_version="3.6.1", other_version="3.6.0")


def test_pyvenv(tmpdir):
def test_detect_pyvenv(tmpdir):
# type: (Any) -> None
venv = str(tmpdir)
py35 = ensure_python_interpreter(PY35)
Expand Down Expand Up @@ -285,3 +286,48 @@ def test_pyvenv(tmpdir):
assert len(pythons) >= 2, "Expected at least two virtualenv python binaries, found: {}".format(
pythons
)


def check_resolve_venv(real_interpreter):
# type: (PythonInterpreter) -> None
tmpdir = safe_mkdtemp()

def create_venv(
interpreter, # type: PythonInterpreter
rel_path, # type: str
):
# type: (...) -> List[str]
venv_dir = os.path.join(tmpdir, rel_path)
interpreter.execute(["-m", "venv", venv_dir])
return glob.glob(os.path.join(venv_dir, "bin", "python*"))

assert not real_interpreter.is_venv
assert real_interpreter is real_interpreter.resolve_base_interpreter()

for index, python in enumerate(create_venv(real_interpreter, "first-level")):
venv_interpreter = PythonInterpreter.from_binary(python)
assert venv_interpreter.is_venv
assert venv_interpreter != real_interpreter.binary
assert real_interpreter == venv_interpreter.resolve_base_interpreter()

for nested_python in create_venv(venv_interpreter, "second-level{}".format(index)):
nested_venv_interpreter = PythonInterpreter.from_binary(nested_python)
assert nested_venv_interpreter.is_venv
assert nested_venv_interpreter != venv_interpreter
assert nested_venv_interpreter != real_interpreter
assert real_interpreter == nested_venv_interpreter.resolve_base_interpreter()


def test_resolve_venv():
# type: () -> None
real_interpreter = PythonInterpreter.from_binary(ensure_python_interpreter(PY35))
check_resolve_venv(real_interpreter)


@pytest.mark.skipif(
PY_VER < (3, 0), reason="Test requires the venv module which is not present in Python 2."
)
def test_resolve_venv_ambient():
# type: () -> None
ambient_real_interpreter = PythonInterpreter.get().resolve_base_interpreter()
check_resolve_venv(ambient_real_interpreter)
7 changes: 4 additions & 3 deletions tests/test_pex_bootstrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def find_interpreters(
constraints=None, # type: Optional[Iterable[str]]
preferred_interpreter=None, # type: Optional[PythonInterpreter]
):
# type: (...) -> List[AnyStr]
# type: (...) -> List[str]
return [
interp.binary
for interp in iter_compatible_interpreters(
Expand Down Expand Up @@ -163,8 +163,9 @@ def test_find_compatible_interpreters_with_valid_basenames_and_constraints():
def test_find_compatible_interpreters_bias_current():
# type: () -> None
py36 = ensure_python_interpreter(PY36)
assert [os.path.realpath(sys.executable), py36] == find_interpreters([py36, sys.executable])
assert [os.path.realpath(sys.executable), py36] == find_interpreters([sys.executable, py36])
current_interpreter = PythonInterpreter.get()
assert [current_interpreter.binary, py36] == find_interpreters([py36, sys.executable])
assert [current_interpreter.binary, py36] == find_interpreters([sys.executable, py36])


def test_find_compatible_interpreters_siblings_of_current_issues_1109():
Expand Down

0 comments on commit a316163

Please sign in to comment.