Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use symlinks to add dists in the Pex CLI. #1185

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pex/bin/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from pex.orderedset import OrderedSet
from pex.pex import PEX
from pex.pex_bootstrapper import ensure_venv, iter_compatible_interpreters
from pex.pex_builder import PEXBuilder
from pex.pex_builder import CopyMode, PEXBuilder
from pex.pip import ResolverVersion
from pex.platforms import Platform
from pex.resolver import Unsatisfiable, parsed_platform, resolve_from_pex, resolve_multi
Expand Down Expand Up @@ -870,6 +870,7 @@ def to_python_interpreter(full_path_or_basename):
path=safe_mkdtemp(),
interpreter=interpreter,
preamble=preamble,
copy_mode=CopyMode.SYMLINK,
include_tools=options.include_tools or options.venv,
)

Expand Down
41 changes: 33 additions & 8 deletions pex/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,20 @@ def link(self, src, dst, label=None):
safe_copy(abs_src, abs_dst, overwrite=False)
# TODO: Ensure the target and dest are the same if the file already exists.

def symlink(
self,
src, # type: str
dst, # type: str
label=None, # type: Optional[str]
):
# type: (...) -> None
dst = self._normalize(dst)
self._tag(dst, label)
self._ensure_parent(dst)
abs_src = src
abs_dst = os.path.join(self.chroot, dst)
os.symlink(abs_src, abs_dst)

def write(self, data, dst, label=None, mode="wb"):
"""Write data to ``chroot/dst`` with optional label.

Expand Down Expand Up @@ -640,11 +654,10 @@ def delete(self):
def zip(self, filename, mode="w", deterministic_timestamp=False):
with open_zip(filename, mode) as zf:

def write_entry(path):
full_path = os.path.join(self.chroot, path)
def write_entry(filename, arcname):
zip_entry = zf.zip_entry_from_file(
filename=full_path,
arcname=path,
filename=filename,
arcname=arcname,
date_time=DETERMINISTIC_DATETIME.timetuple()
if deterministic_timestamp
else None,
Expand All @@ -664,9 +677,21 @@ def maybe_write_parent_dirs(path):
if parent_dir is None or parent_dir in written_dirs:
return
maybe_write_parent_dirs(parent_dir)
write_entry(parent_dir)
write_entry(filename=os.path.join(self.chroot, parent_dir), arcname=parent_dir)
written_dirs.add(parent_dir)

for f in sorted(self.files()):
maybe_write_parent_dirs(f)
write_entry(f)
def iter_files():
for path in sorted(self.files()):
full_path = os.path.join(self.chroot, path)
if os.path.isfile(full_path):
yield full_path, path
continue
for root, _, files in os.walk(full_path):
for f in files:
abs_path = os.path.join(root, f)
rel_path = os.path.join(path, os.path.relpath(abs_path, full_path))
yield abs_path, rel_path

for filename, arcname in iter_files():
maybe_write_parent_dirs(arcname)
write_entry(filename, arcname)
99 changes: 73 additions & 26 deletions pex/pex_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,44 @@
from pex.interpreter import PythonInterpreter
from pex.pex_info import PexInfo
from pex.pip import get_pip
from pex.third_party.pkg_resources import DefaultProvider, ZipProvider, get_provider
from pex.third_party.pkg_resources import DefaultProvider, Distribution, ZipProvider, get_provider
from pex.tracer import TRACER
from pex.typing import TYPE_CHECKING
from pex.util import CacheHelper, DistributionHelper

if TYPE_CHECKING:
from typing import Optional, Dict


class CopyMode(object):
class Value(object):
def __init__(self, value):
# type: (str) -> None
self.value = value

def __repr__(self):
# type: () -> str
return repr(self.value)

COPY = Value("copy")
LINK = Value("link")
SYMLINK = Value("symlink")

values = COPY, LINK, SYMLINK

@classmethod
def for_value(cls, value):
# type: (str) -> CopyMode.Value
for v in cls.values:
if v.value == value:
return v
raise ValueError(
"{!r} of type {} must be one of {}".format(
value, type(value), ", ".join(map(repr, cls.values))
)
)


BOOTSTRAP_DIR = ".bootstrap"

BOOTSTRAP_ENVIRONMENT = """\
Expand Down Expand Up @@ -124,14 +158,15 @@ class InvalidExecutableSpecification(Error):

def __init__(
self,
path=None,
interpreter=None,
chroot=None,
pex_info=None,
preamble=None,
copy=False,
include_tools=False,
path=None, # type: Optional[str]
interpreter=None, # type: Optional[PythonInterpreter]
chroot=None, # type: Optional[Chroot]
pex_info=None, # type: Optional[PexInfo]
preamble=None, # type: Optional[str]
copy_mode=CopyMode.LINK, # type: CopyMode.Value
include_tools=False, # type: bool
):
# type: (...) -> None
"""Initialize a pex builder.

:keyword path: The path to write the PEX as it is built. If ``None`` is specified,
Expand All @@ -142,9 +177,7 @@ def __init__(
:keyword pex_info: A preexisting PexInfo to use to build the PEX.
:keyword preamble: If supplied, execute this code prior to bootstrapping this PEX
environment.
:type preamble: str
:keyword copy: If False, attempt to create the pex environment via hard-linking, falling
back to copying across devices. If True, always copy.
:keyword copy_mode: Create the pex environment using the given copy mode.
:keyword include_tools: If True, include runtime tools which can be executed by exporting
`PEX_TOOLS=1`.

Expand All @@ -156,13 +189,13 @@ def __init__(
self._chroot = chroot or Chroot(path or safe_mkdtemp())
self._pex_info = pex_info or PexInfo.default(self._interpreter)
self._preamble = preamble or ""
self._copy = copy
self._copy_mode = copy_mode
self._include_tools = include_tools

self._shebang = self._interpreter.identity.hashbang()
self._logger = logging.getLogger(__name__)
self._frozen = False
self._distributions = set()
self._distributions = {} # type: Dict[str, Distribution]

def _ensure_unfrozen(self, name="Operation"):
if self._frozen:
Expand Down Expand Up @@ -194,7 +227,7 @@ def clone(self, into=None):
interpreter=self._interpreter,
pex_info=self._pex_info.copy(),
preamble=self._preamble,
copy=self._copy,
copy_mode=self._copy_mode,
)
clone.set_shebang(self._shebang)
clone._distributions = self._distributions.copy()
Expand Down Expand Up @@ -326,14 +359,16 @@ def set_script(self, script):
"""

# check if 'script' is a console_script
dist, entry_point = get_entry_point_from_console_script(script, self._distributions)
dist, entry_point = get_entry_point_from_console_script(
script, self._distributions.values()
)
if entry_point:
self.set_entry_point(entry_point)
TRACER.log("Set entrypoint to console_script %r in %r" % (entry_point, dist))
return

# check if 'script' is an ordinary script
dist_script = get_script_from_distributions(script, self._distributions)
dist_script = get_script_from_distributions(script, self._distributions.values())
if dist_script:
if self._pex_info.entry_point:
raise self.InvalidExecutableSpecification(
Expand All @@ -345,7 +380,7 @@ def set_script(self, script):

raise self.InvalidExecutableSpecification(
"Could not find script %r in any distribution %s within PEX!"
% (script, ", ".join(str(d) for d in self._distributions))
% (script, ", ".join(str(d) for d in self._distributions.values()))
)

def set_entry_point(self, entry_point):
Expand Down Expand Up @@ -377,12 +412,16 @@ def set_shebang(self, shebang):
self._shebang = "#!%s" % shebang if not shebang.startswith("#!") else shebang

def _add_dist_dir(self, path, dist_name):
for root, _, files in os.walk(path):
for f in files:
filename = os.path.join(root, f)
relpath = os.path.relpath(filename, path)
target = os.path.join(self._pex_info.internal_cache, dist_name, relpath)
self._copy_or_link(filename, target)
target_dir = os.path.join(self._pex_info.internal_cache, dist_name)
if self._copy_mode == CopyMode.SYMLINK:
self._copy_or_link(path, target_dir)
else:
for root, _, files in os.walk(path):
for f in files:
filename = os.path.join(root, f)
relpath = os.path.relpath(filename, path)
target = os.path.join(target_dir, relpath)
self._copy_or_link(filename, target)
return CacheHelper.dir_hash(path)

def _add_dist_wheel_file(self, path, dist_name):
Expand All @@ -402,9 +441,14 @@ def add_distribution(self, dist, dist_name=None):
this will be inferred from the distribution itself should it be formatted in a standard way.
:type dist: :class:`pkg_resources.Distribution`
"""
if dist.location in self._distributions:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new SYMLINK copy_mode brought to light the fact we were doing over-writes here when populating the PEXBuilder with the results of multi-target resolves whenever a distribution applied to more than one target (say a universal wheel). This was a harmless time-waste previously.

TRACER.log(
"Skipping adding {} - already added from {}".format(dist, dist.location), V=9
)
return
self._ensure_unfrozen("Adding a distribution")
dist_name = dist_name or os.path.basename(dist.location)
self._distributions.add(dist)
self._distributions[dist.location] = dist

if os.path.isdir(dist.location):
dist_hash = self._add_dist_dir(dist.location, dist_name)
Expand Down Expand Up @@ -482,8 +526,10 @@ def _prepare_code(self):
def _copy_or_link(self, src, dst, label=None):
if src is None:
self._chroot.touch(dst, label)
elif self._copy:
elif self._copy_mode == CopyMode.COPY:
self._chroot.copy(src, dst, label)
elif self._copy_mode == CopyMode.SYMLINK:
self._chroot.symlink(src, dst, label)
else:
self._chroot.link(src, dst, label)

Expand Down Expand Up @@ -558,7 +604,8 @@ def build(self, filename, bytecode_compile=True, deterministic_timestamp=False):
with safe_open(tmp_zip, "ab") as pexfile:
assert os.path.getsize(pexfile.name) == 0
pexfile.write(to_bytes("{}\n".format(self._shebang)))
self._chroot.zip(tmp_zip, mode="a", deterministic_timestamp=deterministic_timestamp)
with TRACER.timed("Zipping PEX file."):
self._chroot.zip(tmp_zip, mode="a", deterministic_timestamp=deterministic_timestamp)
if os.path.exists(filename):
os.unlink(filename)
os.rename(tmp_zip, filename)
Expand Down
33 changes: 33 additions & 0 deletions tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,39 @@ def test_chroot_zip():
assert b"data" == zip.read("directory/subdirectory/file")


def test_chroot_zip_symlink():
# type: () -> None
with temporary_dir() as tmp:
chroot = Chroot(os.path.join(tmp, "chroot"))
chroot.write(b"data", "directory/subdirectory/file")
chroot.symlink(
os.path.join(chroot.path(), "directory/subdirectory/file"),
"directory/subdirectory/symlinked",
)
chroot.symlink(os.path.join(chroot.path(), "directory"), "symlinked")
zip_dst = os.path.join(tmp, "chroot.zip")
chroot.zip(zip_dst)
with open_zip(zip_dst) as zip:
assert [
"directory/",
"directory/subdirectory/",
"directory/subdirectory/file",
"directory/subdirectory/symlinked",
"symlinked/",
"symlinked/subdirectory/",
"symlinked/subdirectory/file",
"symlinked/subdirectory/symlinked",
] == sorted(zip.namelist())
assert b"" == zip.read("directory/")
assert b"" == zip.read("directory/subdirectory/")
assert b"data" == zip.read("directory/subdirectory/file")
assert b"data" == zip.read("directory/subdirectory/symlinked")
assert b"" == zip.read("symlinked/")
assert b"" == zip.read("symlinked/subdirectory/")
assert b"data" == zip.read("symlinked/subdirectory/file")
assert b"data" == zip.read("symlinked/subdirectory/symlinked")


def test_can_write_dir_writeable_perms():
# type: () -> None
with temporary_dir() as writeable:
Expand Down
24 changes: 15 additions & 9 deletions tests/test_pex_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pex.common import temporary_dir
from pex.compatibility import WINDOWS, nested
from pex.pex import PEX
from pex.pex_builder import BOOTSTRAP_DIR, PEXBuilder
from pex.pex_builder import BOOTSTRAP_DIR, CopyMode, PEXBuilder
from pex.testing import make_bdist
from pex.testing import write_simple_pex as write_pex
from pex.typing import TYPE_CHECKING
Expand Down Expand Up @@ -155,14 +155,19 @@ def build_and_check(path, precompile):
@pytest.mark.skipif(WINDOWS, reason="No hardlinks on windows")
def test_pex_builder_copy_or_link():
# type: () -> None
with nested(temporary_dir(), temporary_dir(), temporary_dir()) as (td1, td2, td3):
with nested(temporary_dir(), temporary_dir(), temporary_dir(), temporary_dir()) as (
td1,
td2,
td3,
td4,
):
src = os.path.join(td1, "exe.py")
with open(src, "w") as fp:
fp.write(exe_main)

def build_and_check(path, copy):
# type: (str, bool) -> None
pb = PEXBuilder(path=path, copy=copy)
def build_and_check(path, copy_mode):
# type: (str, CopyMode.Value) -> None
pb = PEXBuilder(path=path, copy_mode=copy_mode)
pb.add_source(src, "exe.py")

path_clone = os.path.join(path, "__clone")
Expand All @@ -172,13 +177,14 @@ def build_and_check(path, copy):
s1 = os.stat(src)
s2 = os.stat(os.path.join(root, "exe.py"))
is_link = (s1[stat.ST_INO], s1[stat.ST_DEV]) == (s2[stat.ST_INO], s2[stat.ST_DEV])
if copy:
if copy_mode == CopyMode.COPY:
assert not is_link
else:
elif copy_mode == CopyMode.LINK:
assert is_link

build_and_check(td2, False)
build_and_check(td3, True)
build_and_check(td2, CopyMode.LINK)
build_and_check(td3, CopyMode.COPY)
build_and_check(td4, CopyMode.SYMLINK)


def test_pex_builder_deterministic_timestamp():
Expand Down