Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: do not copy libraries in site-packages #392

Closed
wants to merge 11 commits into from
12 changes: 12 additions & 0 deletions src/auditwheel/main_repair.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,17 @@ def configure_parser(sub_parsers):
help="Do not check for higher policy compatibility",
default=False,
)
p.add_argument(
"--no-copy-site-libs",
dest="COPY_SITE_LIBS",
action="store_false",
help=(
"Do not copy libraries located in the site-packages directory from "
"other packages (update rpath only). The developer will need to ensure "
"that the skipped libraries are listed the package install dependencies."
),
default=True,
)
mayeut marked this conversation as resolved.
Show resolved Hide resolved
p.set_defaults(func=execute)


Expand Down Expand Up @@ -170,6 +181,7 @@ def execute(args, p):
update_tags=args.UPDATE_TAGS,
patcher=patcher,
strip=args.STRIP,
copy_site_libs=args.COPY_SITE_LIBS,
)

if out_wheel is not None:
Expand Down
10 changes: 10 additions & 0 deletions src/auditwheel/patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ class ElfPatcher:
def replace_needed(self, file_name: str, *old_new_pairs: Tuple[str, str]) -> None:
raise NotImplementedError

def get_soname(self, file_name: str) -> str:
raise NotImplementedError

def set_soname(self, file_name: str, new_so_name: str) -> None:
raise NotImplementedError

Expand Down Expand Up @@ -54,6 +57,13 @@ def replace_needed(self, file_name: str, *old_new_pairs: Tuple[str, str]) -> Non
]
)

def get_soname(self, file_name: str) -> str:
return (
check_output(["patchelf", "--print-soname", file_name])
.decode("utf-8")
.strip()
)

def set_soname(self, file_name: str, new_so_name: str) -> None:
check_call(["patchelf", "--set-soname", new_so_name, file_name])

Expand Down
92 changes: 78 additions & 14 deletions src/auditwheel/repair.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def repair_wheel(
update_tags: bool,
patcher: ElfPatcher,
strip: bool = False,
copy_site_libs: bool = True,
) -> Optional[str]:

external_refs_by_fn = get_wheel_elfdata(wheel_path)[1]
Expand All @@ -62,7 +63,10 @@ def repair_wheel(
dest_dir = match.group("name") + lib_sdir

if not exists(dest_dir):
keep_dest_dir = False
os.mkdir(dest_dir)
else:
keep_dest_dir = True

# here, fn is a path to a python extension library in
# the wheel, and v['libs'] contains its required libs
Expand All @@ -79,16 +83,32 @@ def repair_wheel(
% soname
)

if not copy_site_libs and "site-packages" in str(src_path).split(
Copy link
Member

@mayeut mayeut Oct 8, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no guarantee for the site-packages folder to have that name (in fact, it doesn't on debian python, its name is dist-packages).

os.path.sep
):
continue

new_soname, new_path = copylib(src_path, dest_dir, patcher)
soname_map[soname] = (new_soname, new_path)
replacements.append((soname, new_soname))

if replacements:
patcher.replace_needed(fn, *replacements)

if len(ext_libs) > 0:
new_rpath = os.path.relpath(dest_dir, os.path.dirname(fn))
new_rpath = os.path.join("$ORIGIN", new_rpath)
append_rpath_within_wheel(fn, new_rpath, ctx.name, patcher)
if len(soname_map) > 0:
new_rpath = os.path.relpath(dest_dir, os.path.dirname(fn))
new_rpath = os.path.join("$ORIGIN", new_rpath)
else:
# no new .so files are copied
new_rpath = None # type: ignore
append_rpath_within_wheel(
fn,
new_rpath,
ctx.name,
patcher,
copy_site_libs=copy_site_libs,
)

# we grafted in a bunch of libraries and modified their sonames, but
# they may have internal dependencies (DT_NEEDED) on one another, so
Expand All @@ -111,6 +131,9 @@ def repair_wheel(
extensions = external_refs_by_fn.keys()
strip_symbols(itertools.chain(libs_to_strip, extensions))

if not (len(soname_map) > 0 or keep_dest_dir):
shutil.rmtree(dest_dir, ignore_errors=True) # move unnecessary directory

return ctx.out_wheel


Expand Down Expand Up @@ -163,7 +186,12 @@ def copylib(src_path: str, dest_dir: str, patcher: ElfPatcher) -> Tuple[str, str


def append_rpath_within_wheel(
lib_name: str, rpath: str, wheel_base_dir: str, patcher: ElfPatcher
lib_name: str,
new_rpath: Optional[str],
wheel_base_dir: str,
patcher: ElfPatcher,
*,
copy_site_libs: bool = True,
) -> None:
"""Add a new rpath entry to a file while preserving as many existing
rpath entries as possible.
Expand All @@ -180,34 +208,70 @@ def append_rpath_within_wheel(
wheel_base_dir = abspath(wheel_base_dir)

def is_valid_rpath(rpath: str) -> bool:
return _is_valid_rpath(rpath, lib_dir, wheel_base_dir)
return _is_valid_rpath(
rpath, lib_dir, wheel_base_dir, copy_site_libs=copy_site_libs
)

site_packages = os.path.join("$ORIGIN", os.path.relpath(wheel_base_dir, lib_dir))

old_rpaths = patcher.get_rpath(lib_name)
rpaths = filter(is_valid_rpath, old_rpaths.split(":"))
rpaths = [rp for rp in old_rpaths.split(":") if is_valid_rpath(rp)]
rpaths = [
rp
if copy_site_libs or "site-packages" not in rp.split(os.path.sep)
else os.path.join(
site_packages,
rp.rpartition(os.path.sep + "site-packages" + os.path.sep)[-1],
)
for rp in rpaths
]
if new_rpath is not None:
rpaths.append(new_rpath)
# Remove duplicates while preserving ordering
# Fake an OrderedSet using OrderedDict
rpath_set = OrderedDict([(old_rpath, "") for old_rpath in rpaths])
rpath_set[rpath] = ""
rpaths = list(OrderedDict.fromkeys(rpaths))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we just use dict instead here?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since Python 3.6, the dict keys are kept in insertion order. But I think explicitly using OrderedDict improves readability.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok for now


patcher.set_rpath(lib_name, ":".join(rpath_set))
patcher.set_rpath(lib_name, ":".join(rpaths))


def _is_valid_rpath(rpath: str, lib_dir: str, wheel_base_dir: str) -> bool:
def _is_valid_rpath(
rpath: str,
lib_dir: str,
wheel_base_dir: str,
*,
copy_site_libs: bool = True,
) -> bool:
full_rpath_entry = _resolve_rpath_tokens(rpath, lib_dir)

if not isabs(full_rpath_entry):
logger.debug(
f"rpath entry {rpath} could not be resolved to an "
"absolute path -- discarding it."
)
return False
elif not is_subdir(full_rpath_entry, wheel_base_dir):

if not is_subdir(full_rpath_entry, wheel_base_dir):
if not copy_site_libs and "site-packages" in full_rpath_entry.split(
os.path.sep
):
site_packages = os.path.join(
"$ORIGIN",
os.path.relpath(wheel_base_dir, lib_dir),
)
new_rpath = os.path.join(
site_packages,
rpath.rpartition(os.path.sep + "site-packages" + os.path.sep)[-1],
)
logger.debug(f"Preserved rpath entry {rpath} as {new_rpath}")
return True

logger.debug(
f"rpath entry {rpath} points outside the wheel -- " "discarding it."
)
return False
else:
logger.debug(f"Preserved rpath entry {rpath}")
return True

logger.debug(f"Preserved rpath entry {rpath}")
return True


def _resolve_rpath_tokens(rpath: str, lib_base_dir: str) -> str:
Expand Down