Skip to content

Commit

Permalink
Simplify codefile detection (#5040)
Browse files Browse the repository at this point in the history
* Deprecate is_string
* Deprecate is_codefile
* Deprecate codefile_type
* Add codefile_class unittest
  • Loading branch information
kenodegard authored Oct 19, 2023
1 parent f7cf445 commit 7c9e766
Show file tree
Hide file tree
Showing 13 changed files with 230 additions and 117 deletions.
18 changes: 10 additions & 8 deletions conda_build/inspect_pkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
get_package_obj_files,
get_untracked_obj_files,
)
from conda_build.os_utils.liefldd import codefile_type
from conda_build.os_utils.liefldd import codefile_class, machofile
from conda_build.os_utils.macho import get_rpaths, human_filetype
from conda_build.utils import (
comma_join,
Expand Down Expand Up @@ -354,14 +354,16 @@ def inspect_objects(packages, prefix=sys.prefix, groupby="package"):

info = []
for f in obj_files:
f_info = {}
path = join(prefix, f)
filetype = codefile_type(path)
if filetype == "machofile":
f_info["filetype"] = human_filetype(path, None)
f_info["rpath"] = ":".join(get_rpaths(path))
f_info["filename"] = f
info.append(f_info)
codefile = codefile_class(path)
if codefile == machofile:
info.append(
{
"filetype": human_filetype(path, None),
"rpath": ":".join(get_rpaths(path)),
"filename": f,
}
)

output_string += print_object_info(info, groupby)
if hasattr(output_string, "decode"):
Expand Down
11 changes: 3 additions & 8 deletions conda_build/os_utils/ldd.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,7 @@

from conda_build.conda_interface import linked_data, untracked
from conda_build.os_utils.macho import otool
from conda_build.os_utils.pyldd import (
codefile_class,
inspect_linkages,
is_codefile,
machofile,
)
from conda_build.os_utils.pyldd import codefile_class, inspect_linkages, machofile

LDD_RE = re.compile(r"\s*(.*?)\s*=>\s*(.*?)\s*\(.*\)")
LDD_NOT_FOUND_RE = re.compile(r"\s*(.*?)\s*=>\s*not found")
Expand Down Expand Up @@ -118,7 +113,7 @@ def get_package_obj_files(dist, prefix):
files = get_package_files(dist, prefix)
for f in files:
path = join(prefix, f)
if is_codefile(path):
if codefile_class(path):
res.append(f)

return res
Expand All @@ -130,7 +125,7 @@ def get_untracked_obj_files(prefix):
files = untracked(prefix)
for f in files:
path = join(prefix, f)
if is_codefile(path):
if codefile_class(path):
res.append(f)

return res
116 changes: 73 additions & 43 deletions conda_build/os_utils/liefldd.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,58 @@
# Copyright (C) 2014 Anaconda, Inc
# SPDX-License-Identifier: BSD-3-Clause
try:
from collections.abc import Hashable
except ImportError:
from collections.abc import Hashable
from __future__ import annotations

import hashlib
import json
import os
import struct
import sys
import threading
from collections.abc import Hashable
from fnmatch import fnmatch
from functools import partial
from pathlib import Path
from subprocess import PIPE, Popen

from ..deprecations import deprecated
from .external import find_executable

# lief cannot handle files it doesn't know about gracefully
# TODO :: Remove all use of pyldd
# Currently we verify the output of each against the other
from .pyldd import codefile_type as codefile_type_pyldd
from .pyldd import DLLfile, EXEfile, elffile, machofile
from .pyldd import codefile_type as _codefile_type
from .pyldd import inspect_linkages as inspect_linkages_pyldd

codefile_type = codefile_type_pyldd
have_lief = False
try:
import lief

lief.logging.disable()
have_lief = True
except:
pass
except ImportError:
have_lief = False


@deprecated("3.28.0", "4.0.0", addendum="Use `isinstance(value, str)` instead.")
def is_string(s):
try:
return isinstance(s, basestring)
except NameError:
return isinstance(s, str)
return isinstance(s, str)


# Some functions can operate on either file names
# or an already loaded binary. Generally speaking
# these are to be avoided, or if not avoided they
# should be passed a binary when possible as that
# will prevent having to parse it multiple times.
def ensure_binary(file):
if not is_string(file):
def ensure_binary(file: str | os.PathLike | Path | lief.Binary) -> lief.Binary | None:
if isinstance(file, lief.Binary):
return file
else:
try:
if not os.path.exists(file):
return []
return lief.parse(file)
except:
print(f"WARNING: liefldd: failed to ensure_binary({file})")
return None
elif not Path(file).exists():
return None
try:
return lief.parse(str(file))
except BaseException:
print(f"WARNING: liefldd: failed to ensure_binary({file})")
return None


def nm(filename):
Expand All @@ -77,25 +73,57 @@ def nm(filename):
print("No symbols found")


def codefile_type_liefldd(file, skip_symlinks=True):
binary = ensure_binary(file)
result = None
if binary:
if binary.format == lief.EXE_FORMATS.PE:
if lief.PE.DLL_CHARACTERISTICS:
if binary.header.characteristics & lief.PE.HEADER_CHARACTERISTICS.DLL:
result = "DLLfile"
else:
result = "EXEfile"
if have_lief:

def codefile_class(
path: str | os.PathLike | Path,
skip_symlinks: bool = False,
) -> type[DLLfile | EXEfile | machofile | elffile] | None:
# same signature as conda.os_utils.pyldd.codefile_class
if not (binary := ensure_binary(path)):
return None
elif (
binary.format == lief.EXE_FORMATS.PE
and lief.PE.HEADER_CHARACTERISTICS.DLL in binary.header.characteristics_list
):
return DLLfile
elif binary.format == lief.EXE_FORMATS.PE:
return EXEfile
elif binary.format == lief.EXE_FORMATS.MACHO:
result = "machofile"
return machofile
elif binary.format == lief.EXE_FORMATS.ELF:
result = "elffile"
return result

return elffile
else:
return None

if have_lief:
codefile_type = codefile_type_liefldd
else:
from .pyldd import codefile_class


@deprecated(
"3.28.0",
"4.0.0",
addendum="Use `conda_build.os_utils.liefldd.codefile_class` instead.",
)
def codefile_type_liefldd(*args, **kwargs) -> str | None:
codefile = codefile_class(*args, **kwargs)
return codefile.__name__ if codefile else None


deprecated.constant(
"3.28.0",
"4.0.0",
"codefile_type_pyldd",
_codefile_type,
addendum="Use `conda_build.os_utils.pyldd.codefile_class` instead.",
)
deprecated.constant(
"3.28.0",
"4.0.0",
"codefile_type",
_codefile_type,
addendum="Use `conda_build.os_utils.liefldd.codefile_class` instead.",
)


def _trim_sysroot(sysroot):
Expand All @@ -111,7 +139,9 @@ def get_libraries(file):
if binary.format == lief.EXE_FORMATS.PE:
result = binary.libraries
else:
result = [lib if is_string(lib) else lib.name for lib in binary.libraries]
result = [
lib if isinstance(lib, str) else lib.name for lib in binary.libraries
]
# LIEF returns LC_ID_DYLIB name @rpath/libbz2.dylib in binary.libraries. Strip that.
binary_name = None
if binary.format == lief.EXE_FORMATS.MACHO:
Expand Down Expand Up @@ -505,7 +535,7 @@ def inspect_linkages_lief(
while tmp_filename:
if (
not parent_exe_dirname
and codefile_type(tmp_filename) == "EXEfile"
and codefile_class(tmp_filename) == EXEfile
):
parent_exe_dirname = os.path.dirname(tmp_filename)
tmp_filename = parents_by_filename[tmp_filename]
Expand Down Expand Up @@ -595,7 +625,7 @@ def get_linkages(
result_pyldd = []
debug = False
if not have_lief or debug:
if codefile_type(filename) not in ("DLLfile", "EXEfile"):
if codefile_class(filename) not in (DLLfile, EXEfile):
result_pyldd = inspect_linkages_pyldd(
filename,
resolve_filenames=resolve_filenames,
Expand All @@ -607,7 +637,7 @@ def get_linkages(
return result_pyldd
else:
print(
f"WARNING: failed to get_linkages, codefile_type('{filename}')={codefile_type(filename)}"
f"WARNING: failed to get_linkages, codefile_class('{filename}')={codefile_class(filename)}"
)
return {}
result_lief = inspect_linkages_lief(
Expand Down
81 changes: 50 additions & 31 deletions conda_build/os_utils/pyldd.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
# Copyright (C) 2014 Anaconda, Inc
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations

import argparse
import glob
import logging
import os
import re
import struct
import sys
from pathlib import Path

from conda_build.utils import ensure_list, get_logger

from ..deprecations import deprecated

logging.basicConfig(level=logging.INFO)


Expand Down Expand Up @@ -1028,46 +1033,60 @@ def codefile(file, arch="any", initial_rpaths_transitive=[]):
return inscrutablefile(file, list(initial_rpaths_transitive))


def codefile_class(filename, skip_symlinks=False):
if os.path.islink(filename):
if skip_symlinks:
return None
else:
filename = os.path.realpath(filename)
if os.path.isdir(filename):
def codefile_class(
path: str | os.PathLike | Path,
skip_symlinks: bool = False,
) -> type[DLLfile | EXEfile | machofile | elffile] | None:
# same signature as conda.os_utils.liefldd.codefile_class
path = Path(path)
if skip_symlinks and path.is_symlink():
return None
if filename.endswith((".dll", ".pyd")):
path = path.resolve()

def _get_magic_bit(path: Path) -> bytes:
with path.open("rb") as handle:
bit = handle.read(4)
return struct.unpack(BIG_ENDIAN + "L", bit)[0]

if path.is_dir():
return None
elif path.suffix.lower() in (".dll", ".pyd"):
return DLLfile
if filename.endswith(".exe"):
elif path.suffix.lower() == ".exe":
return EXEfile
# Java .class files share 0xCAFEBABE with Mach-O FAT_MAGIC.
if filename.endswith(".class"):
elif path.suffix.lower() == ".class":
# Java .class files share 0xCAFEBABE with Mach-O FAT_MAGIC.
return None
if not os.path.exists(filename) or os.path.getsize(filename) < 4:
elif not path.exists() or path.stat().st_size < 4:
return None
elif (magic := _get_magic_bit(path)) == ELF_HDR:
return elffile
elif magic in (FAT_MAGIC, MH_MAGIC, MH_CIGAM, MH_CIGAM_64):
return machofile
else:
return None
with open(filename, "rb") as file:
(magic,) = struct.unpack(BIG_ENDIAN + "L", file.read(4))
file.seek(0)
if magic in (FAT_MAGIC, MH_MAGIC, MH_CIGAM, MH_CIGAM_64):
return machofile
elif magic == ELF_HDR:
return elffile
return None


def is_codefile(filename, skip_symlinks=True):
klass = codefile_class(filename, skip_symlinks=skip_symlinks)
if not klass:
return False
return True
@deprecated(
"3.28.0",
"4.0.0",
addendum="Use `conda_build.os_utils.pyldd.codefile_class` instead.",
)
def is_codefile(path: str | os.PathLike | Path, skip_symlinks: bool = True) -> bool:
return bool(codefile_class(path, skip_symlinks=skip_symlinks))


def codefile_type(filename, skip_symlinks=True):
"Returns None, 'machofile' or 'elffile'"
klass = codefile_class(filename, skip_symlinks=skip_symlinks)
if not klass:
return None
return klass.__name__
@deprecated(
"3.28.0",
"4.0.0",
addendum="Use `conda_build.os_utils.pyldd.codefile_class` instead.",
)
def codefile_type(
path: str | os.PathLike | Path,
skip_symlinks: bool = True,
) -> str | None:
codefile = codefile_class(path, skip_symlinks=skip_symlinks)
return codefile.__name__ if codefile else None


def _trim_sysroot(sysroot):
Expand Down
Loading

0 comments on commit 7c9e766

Please sign in to comment.