Skip to content

Commit

Permalink
Use symlinks to add dists in the Pex CLI. (#1185)
Browse files Browse the repository at this point in the history
This avoids copying dists from ~/.pex/installed_wheels to a PEXBuilder
tmpdir before finally zipping up the tmpdir. Instead, with just a single
symlink added to the PEXBuilder tmpdir for each dist, the zip is created
directly from the sources under ~/.pex/installed_wheels providing a
modest speed up when building large PEX files.
  • Loading branch information
jsirois authored Jan 21, 2021
1 parent 997c95c commit 552b926
Show file tree
Hide file tree
Showing 5 changed files with 156 additions and 44 deletions.
3 changes: 2 additions & 1 deletion pex/bin/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from pex.orderedset import OrderedSet
from pex.pex import PEX
from pex.pex_bootstrapper import ensure_venv, iter_compatible_interpreters
from pex.pex_builder import PEXBuilder
from pex.pex_builder import CopyMode, PEXBuilder
from pex.pip import ResolverVersion
from pex.platforms import Platform
from pex.resolver import Unsatisfiable, parsed_platform, resolve_from_pex, resolve_multi
Expand Down Expand Up @@ -870,6 +870,7 @@ def to_python_interpreter(full_path_or_basename):
path=safe_mkdtemp(),
interpreter=interpreter,
preamble=preamble,
copy_mode=CopyMode.SYMLINK,
include_tools=options.include_tools or options.venv,
)

Expand Down
41 changes: 33 additions & 8 deletions pex/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,20 @@ def link(self, src, dst, label=None):
safe_copy(abs_src, abs_dst, overwrite=False)
# TODO: Ensure the target and dest are the same if the file already exists.

def symlink(
self,
src, # type: str
dst, # type: str
label=None, # type: Optional[str]
):
# type: (...) -> None
dst = self._normalize(dst)
self._tag(dst, label)
self._ensure_parent(dst)
abs_src = src
abs_dst = os.path.join(self.chroot, dst)
os.symlink(abs_src, abs_dst)

def write(self, data, dst, label=None, mode="wb"):
"""Write data to ``chroot/dst`` with optional label.
Expand Down Expand Up @@ -640,11 +654,10 @@ def delete(self):
def zip(self, filename, mode="w", deterministic_timestamp=False):
with open_zip(filename, mode) as zf:

def write_entry(path):
full_path = os.path.join(self.chroot, path)
def write_entry(filename, arcname):
zip_entry = zf.zip_entry_from_file(
filename=full_path,
arcname=path,
filename=filename,
arcname=arcname,
date_time=DETERMINISTIC_DATETIME.timetuple()
if deterministic_timestamp
else None,
Expand All @@ -664,9 +677,21 @@ def maybe_write_parent_dirs(path):
if parent_dir is None or parent_dir in written_dirs:
return
maybe_write_parent_dirs(parent_dir)
write_entry(parent_dir)
write_entry(filename=os.path.join(self.chroot, parent_dir), arcname=parent_dir)
written_dirs.add(parent_dir)

for f in sorted(self.files()):
maybe_write_parent_dirs(f)
write_entry(f)
def iter_files():
for path in sorted(self.files()):
full_path = os.path.join(self.chroot, path)
if os.path.isfile(full_path):
yield full_path, path
continue
for root, _, files in os.walk(full_path):
for f in files:
abs_path = os.path.join(root, f)
rel_path = os.path.join(path, os.path.relpath(abs_path, full_path))
yield abs_path, rel_path

for filename, arcname in iter_files():
maybe_write_parent_dirs(arcname)
write_entry(filename, arcname)
99 changes: 73 additions & 26 deletions pex/pex_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,44 @@
from pex.interpreter import PythonInterpreter
from pex.pex_info import PexInfo
from pex.pip import get_pip
from pex.third_party.pkg_resources import DefaultProvider, ZipProvider, get_provider
from pex.third_party.pkg_resources import DefaultProvider, Distribution, ZipProvider, get_provider
from pex.tracer import TRACER
from pex.typing import TYPE_CHECKING
from pex.util import CacheHelper, DistributionHelper

if TYPE_CHECKING:
from typing import Optional, Dict


class CopyMode(object):
class Value(object):
def __init__(self, value):
# type: (str) -> None
self.value = value

def __repr__(self):
# type: () -> str
return repr(self.value)

COPY = Value("copy")
LINK = Value("link")
SYMLINK = Value("symlink")

values = COPY, LINK, SYMLINK

@classmethod
def for_value(cls, value):
# type: (str) -> CopyMode.Value
for v in cls.values:
if v.value == value:
return v
raise ValueError(
"{!r} of type {} must be one of {}".format(
value, type(value), ", ".join(map(repr, cls.values))
)
)


BOOTSTRAP_DIR = ".bootstrap"

BOOTSTRAP_ENVIRONMENT = """\
Expand Down Expand Up @@ -124,14 +158,15 @@ class InvalidExecutableSpecification(Error):

def __init__(
self,
path=None,
interpreter=None,
chroot=None,
pex_info=None,
preamble=None,
copy=False,
include_tools=False,
path=None, # type: Optional[str]
interpreter=None, # type: Optional[PythonInterpreter]
chroot=None, # type: Optional[Chroot]
pex_info=None, # type: Optional[PexInfo]
preamble=None, # type: Optional[str]
copy_mode=CopyMode.LINK, # type: CopyMode.Value
include_tools=False, # type: bool
):
# type: (...) -> None
"""Initialize a pex builder.
:keyword path: The path to write the PEX as it is built. If ``None`` is specified,
Expand All @@ -142,9 +177,7 @@ def __init__(
:keyword pex_info: A preexisting PexInfo to use to build the PEX.
:keyword preamble: If supplied, execute this code prior to bootstrapping this PEX
environment.
:type preamble: str
:keyword copy: If False, attempt to create the pex environment via hard-linking, falling
back to copying across devices. If True, always copy.
:keyword copy_mode: Create the pex environment using the given copy mode.
:keyword include_tools: If True, include runtime tools which can be executed by exporting
`PEX_TOOLS=1`.
Expand All @@ -156,13 +189,13 @@ def __init__(
self._chroot = chroot or Chroot(path or safe_mkdtemp())
self._pex_info = pex_info or PexInfo.default(self._interpreter)
self._preamble = preamble or ""
self._copy = copy
self._copy_mode = copy_mode
self._include_tools = include_tools

self._shebang = self._interpreter.identity.hashbang()
self._logger = logging.getLogger(__name__)
self._frozen = False
self._distributions = set()
self._distributions = {} # type: Dict[str, Distribution]

def _ensure_unfrozen(self, name="Operation"):
if self._frozen:
Expand Down Expand Up @@ -194,7 +227,7 @@ def clone(self, into=None):
interpreter=self._interpreter,
pex_info=self._pex_info.copy(),
preamble=self._preamble,
copy=self._copy,
copy_mode=self._copy_mode,
)
clone.set_shebang(self._shebang)
clone._distributions = self._distributions.copy()
Expand Down Expand Up @@ -326,14 +359,16 @@ def set_script(self, script):
"""

# check if 'script' is a console_script
dist, entry_point = get_entry_point_from_console_script(script, self._distributions)
dist, entry_point = get_entry_point_from_console_script(
script, self._distributions.values()
)
if entry_point:
self.set_entry_point(entry_point)
TRACER.log("Set entrypoint to console_script %r in %r" % (entry_point, dist))
return

# check if 'script' is an ordinary script
dist_script = get_script_from_distributions(script, self._distributions)
dist_script = get_script_from_distributions(script, self._distributions.values())
if dist_script:
if self._pex_info.entry_point:
raise self.InvalidExecutableSpecification(
Expand All @@ -345,7 +380,7 @@ def set_script(self, script):

raise self.InvalidExecutableSpecification(
"Could not find script %r in any distribution %s within PEX!"
% (script, ", ".join(str(d) for d in self._distributions))
% (script, ", ".join(str(d) for d in self._distributions.values()))
)

def set_entry_point(self, entry_point):
Expand Down Expand Up @@ -377,12 +412,16 @@ def set_shebang(self, shebang):
self._shebang = "#!%s" % shebang if not shebang.startswith("#!") else shebang

def _add_dist_dir(self, path, dist_name):
for root, _, files in os.walk(path):
for f in files:
filename = os.path.join(root, f)
relpath = os.path.relpath(filename, path)
target = os.path.join(self._pex_info.internal_cache, dist_name, relpath)
self._copy_or_link(filename, target)
target_dir = os.path.join(self._pex_info.internal_cache, dist_name)
if self._copy_mode == CopyMode.SYMLINK:
self._copy_or_link(path, target_dir)
else:
for root, _, files in os.walk(path):
for f in files:
filename = os.path.join(root, f)
relpath = os.path.relpath(filename, path)
target = os.path.join(target_dir, relpath)
self._copy_or_link(filename, target)
return CacheHelper.dir_hash(path)

def _add_dist_wheel_file(self, path, dist_name):
Expand All @@ -402,9 +441,14 @@ def add_distribution(self, dist, dist_name=None):
this will be inferred from the distribution itself should it be formatted in a standard way.
:type dist: :class:`pkg_resources.Distribution`
"""
if dist.location in self._distributions:
TRACER.log(
"Skipping adding {} - already added from {}".format(dist, dist.location), V=9
)
return
self._ensure_unfrozen("Adding a distribution")
dist_name = dist_name or os.path.basename(dist.location)
self._distributions.add(dist)
self._distributions[dist.location] = dist

if os.path.isdir(dist.location):
dist_hash = self._add_dist_dir(dist.location, dist_name)
Expand Down Expand Up @@ -482,8 +526,10 @@ def _prepare_code(self):
def _copy_or_link(self, src, dst, label=None):
if src is None:
self._chroot.touch(dst, label)
elif self._copy:
elif self._copy_mode == CopyMode.COPY:
self._chroot.copy(src, dst, label)
elif self._copy_mode == CopyMode.SYMLINK:
self._chroot.symlink(src, dst, label)
else:
self._chroot.link(src, dst, label)

Expand Down Expand Up @@ -558,7 +604,8 @@ def build(self, filename, bytecode_compile=True, deterministic_timestamp=False):
with safe_open(tmp_zip, "ab") as pexfile:
assert os.path.getsize(pexfile.name) == 0
pexfile.write(to_bytes("{}\n".format(self._shebang)))
self._chroot.zip(tmp_zip, mode="a", deterministic_timestamp=deterministic_timestamp)
with TRACER.timed("Zipping PEX file."):
self._chroot.zip(tmp_zip, mode="a", deterministic_timestamp=deterministic_timestamp)
if os.path.exists(filename):
os.unlink(filename)
os.rename(tmp_zip, filename)
Expand Down
33 changes: 33 additions & 0 deletions tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,39 @@ def test_chroot_zip():
assert b"data" == zip.read("directory/subdirectory/file")


def test_chroot_zip_symlink():
# type: () -> None
with temporary_dir() as tmp:
chroot = Chroot(os.path.join(tmp, "chroot"))
chroot.write(b"data", "directory/subdirectory/file")
chroot.symlink(
os.path.join(chroot.path(), "directory/subdirectory/file"),
"directory/subdirectory/symlinked",
)
chroot.symlink(os.path.join(chroot.path(), "directory"), "symlinked")
zip_dst = os.path.join(tmp, "chroot.zip")
chroot.zip(zip_dst)
with open_zip(zip_dst) as zip:
assert [
"directory/",
"directory/subdirectory/",
"directory/subdirectory/file",
"directory/subdirectory/symlinked",
"symlinked/",
"symlinked/subdirectory/",
"symlinked/subdirectory/file",
"symlinked/subdirectory/symlinked",
] == sorted(zip.namelist())
assert b"" == zip.read("directory/")
assert b"" == zip.read("directory/subdirectory/")
assert b"data" == zip.read("directory/subdirectory/file")
assert b"data" == zip.read("directory/subdirectory/symlinked")
assert b"" == zip.read("symlinked/")
assert b"" == zip.read("symlinked/subdirectory/")
assert b"data" == zip.read("symlinked/subdirectory/file")
assert b"data" == zip.read("symlinked/subdirectory/symlinked")


def test_can_write_dir_writeable_perms():
# type: () -> None
with temporary_dir() as writeable:
Expand Down
24 changes: 15 additions & 9 deletions tests/test_pex_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pex.common import temporary_dir
from pex.compatibility import WINDOWS, nested
from pex.pex import PEX
from pex.pex_builder import BOOTSTRAP_DIR, PEXBuilder
from pex.pex_builder import BOOTSTRAP_DIR, CopyMode, PEXBuilder
from pex.testing import make_bdist
from pex.testing import write_simple_pex as write_pex
from pex.typing import TYPE_CHECKING
Expand Down Expand Up @@ -155,14 +155,19 @@ def build_and_check(path, precompile):
@pytest.mark.skipif(WINDOWS, reason="No hardlinks on windows")
def test_pex_builder_copy_or_link():
# type: () -> None
with nested(temporary_dir(), temporary_dir(), temporary_dir()) as (td1, td2, td3):
with nested(temporary_dir(), temporary_dir(), temporary_dir(), temporary_dir()) as (
td1,
td2,
td3,
td4,
):
src = os.path.join(td1, "exe.py")
with open(src, "w") as fp:
fp.write(exe_main)

def build_and_check(path, copy):
# type: (str, bool) -> None
pb = PEXBuilder(path=path, copy=copy)
def build_and_check(path, copy_mode):
# type: (str, CopyMode.Value) -> None
pb = PEXBuilder(path=path, copy_mode=copy_mode)
pb.add_source(src, "exe.py")

path_clone = os.path.join(path, "__clone")
Expand All @@ -172,13 +177,14 @@ def build_and_check(path, copy):
s1 = os.stat(src)
s2 = os.stat(os.path.join(root, "exe.py"))
is_link = (s1[stat.ST_INO], s1[stat.ST_DEV]) == (s2[stat.ST_INO], s2[stat.ST_DEV])
if copy:
if copy_mode == CopyMode.COPY:
assert not is_link
else:
elif copy_mode == CopyMode.LINK:
assert is_link

build_and_check(td2, False)
build_and_check(td3, True)
build_and_check(td2, CopyMode.LINK)
build_and_check(td3, CopyMode.COPY)
build_and_check(td4, CopyMode.SYMLINK)


def test_pex_builder_deterministic_timestamp():
Expand Down

0 comments on commit 552b926

Please sign in to comment.