Skip to content

Commit

Permalink
Type annotation fixes
Browse files Browse the repository at this point in the history
Lot of changes to the types in the recent lief.
* removed name from Binary
* no name for section/symbol of type str
  (lief-project/LIEF#965)
* created a proxy class Binary
* added pyright to check tests folder
  • Loading branch information
fzakaria committed Sep 11, 2023
1 parent c391445 commit 9af1341
Show file tree
Hide file tree
Showing 11 changed files with 100 additions and 55 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ profile = "black"
addopts = ""

[tool.pyright]
include = ["sqlelf"]
include = ["sqlelf", "tests"]
exclude = ["**/__pycache__"]

reportMissingImports = true
Expand Down
13 changes: 7 additions & 6 deletions sqlelf/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
import apsw
import apsw.bestpractice
import apsw.shell
import lief

from sqlelf import ldd

from .elf import dynamic, header, instruction, section, strings, symbol
from sqlelf.elf import dynamic, header, instruction, section, strings, symbol
from sqlelf.elf.binary import Binary


def start(args=sys.argv[1:], stdin=sys.stdin):
Expand Down Expand Up @@ -55,13 +54,15 @@ def start(args=sys.argv[1:], stdin=sys.stdin):
),
)
# Filter the list of filenames to those that are ELF files only
filenames = list(filter(lambda f: os.path.isfile(f) and lief.is_elf(f), filenames))
filenames = list(
filter(lambda f: os.path.isfile(f) and Binary.is_elf(f), filenames)
)

# If none of the inputs are valid files, simply return
if len(filenames) == 0:
return

binaries: list[lief.Binary] = [lief.parse(filename) for filename in filenames]
binaries: list[Binary] = [Binary(filename) for filename in filenames]

# If the recursive option is specidied, load the shared libraries
# the binary would load as well.
Expand All @@ -76,7 +77,7 @@ def start(args=sys.argv[1:], stdin=sys.stdin):
for library in sub_list
]
)
binaries = binaries + [lief.parse(library) for library in shared_libraries]
binaries = binaries + [Binary(library) for library in shared_libraries]

# forward sqlite logs to logging module
apsw.bestpractice.apply(apsw.bestpractice.recommended)
Expand Down
33 changes: 33 additions & 0 deletions sqlelf/elf/binary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# pyright: strict
from typing import TYPE_CHECKING, Any

import lief

# Let's make sure type checking works for this proxy class
# https://stackoverflow.com/questions/71365594/how-to-make-a-proxy-object-with-typing-as-underlying-object-in-python
if TYPE_CHECKING:
base = lief.ELF.Binary
else:
base = object


class Binary(base):
"""Proxy the lief.Binary object to add a path attribute.
As of https://github.com/lief-project/LIEF/issues/839 the name
attribute in lief.Binary was removed. Rather than passing around
a tuple let's create a nice proxy class.
"""

def __init__(self, path: str):
self.path = path
self.__binary = lief.parse(path)

if not TYPE_CHECKING:

def __getattr__(self, attr: str) -> Any:
return getattr(self.__binary, attr)

@staticmethod
def is_elf(path: str):
return lief.is_elf(path)
17 changes: 11 additions & 6 deletions sqlelf/elf/dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,29 @@

import apsw
import apsw.ext
import lief

from sqlelf.elf.binary import Binary


# This is effectively the .dynamic section but it is elevated as a table here
# since it is widely used and can benefit from simpler table access.
def elf_dynamic_entries(binaries: list[lief.Binary]):
def elf_dynamic_entries(binaries: list[Binary]):
def generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
for entry in binary.dynamic_entries: # pyright: ignore
yield {"path": binary_name, "tag": entry.tag.name, "value": entry.value}
binary_path = binary.path
for entry in binary.dynamic_entries:
yield {
"path": binary_path,
"tag": entry.tag.__name__,
"value": entry.value,
}

return generator


def register(connection: apsw.Connection, binaries: list[lief.Binary]):
def register(connection: apsw.Connection, binaries: list[Binary]):
generator = elf_dynamic_entries(binaries)
# setup columns and access by providing an example of the first entry returned
generator.columns, generator.column_access = apsw.ext.get_column_names(
Expand Down
18 changes: 8 additions & 10 deletions sqlelf/elf/header.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,26 @@
# Without this Python was complaining
from __future__ import annotations

from typing import Any, Iterator

import apsw
import apsw.ext
import lief

from sqlelf.elf.binary import Binary


def elf_headers(binaries: list[lief.Binary]):
def elf_headers(binaries: list[Binary]):
def generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
yield {
"path": binary.name,
"type": binary.header.file_type.name,
"machine": binary.header.machine_type.name,
"version": binary.header.identity_version.name,
"path": binary.path,
"type": binary.header.file_type.__name__,
"machine": binary.header.machine_type.__name__,
"version": binary.header.identity_version.__name__,
"entry": binary.header.entrypoint,
}

return generator


def register(connection: apsw.Connection, binaries: list[lief.Binary]):
def register(connection: apsw.Connection, binaries: list[Binary]):
generator = elf_headers(binaries)
# setup columns and access by providing an example of the first entry returned
generator.columns, generator.column_access = apsw.ext.get_column_names(
Expand Down
18 changes: 10 additions & 8 deletions sqlelf/elf/instruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@
import capstone # pyright: ignore
import lief

from sqlelf.elf.binary import Binary

def elf_instructions(binaries: list[lief.Binary]):

def elf_instructions(binaries: list[Binary]):
def generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
binary_path = binary.path

for section in binary.sections:
if section.has(lief.ELF.SECTION_FLAGS.EXECINSTR):
Expand All @@ -34,7 +36,7 @@ def generator() -> Iterator[dict[str, Any]]:
data, section.virtual_address
):
yield {
"path": binary_name,
"path": binary_path,
"section": section_name,
"mnemonic": mnemonic,
"address": address,
Expand All @@ -44,19 +46,19 @@ def generator() -> Iterator[dict[str, Any]]:
return generator


def mode(binary: lief.Binary) -> int:
def mode(binary: Binary) -> int:
if binary.header.identity_class == lief.ELF.ELF_CLASS.CLASS64:
return capstone.CS_MODE_64
raise Exception(f"Unknown mode for {binary.name}")
raise Exception(f"Unknown mode for {binary.path}")


def arch(binary: lief.Binary) -> int:
def arch(binary: Binary) -> int:
if binary.header.machine_type == lief.ELF.ARCH.x86_64:
return capstone.CS_ARCH_X86
raise Exception(f"Unknown machine type for {binary.name}")
raise Exception(f"Unknown machine type for {binary.path}")


def register(connection: apsw.Connection, binaries: list[lief.Binary]):
def register(connection: apsw.Connection, binaries: list[Binary]):
generator = elf_instructions(binaries)
# setup columns and access by providing an example of the first entry returned
generator.columns, generator.column_access = apsw.ext.get_column_names(
Expand Down
13 changes: 7 additions & 6 deletions sqlelf/elf/section.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,23 @@

import apsw
import apsw.ext
import lief

from sqlelf.elf.binary import Binary

def elf_sections(binaries: list[lief.Binary]):

def elf_sections(binaries: list[Binary]):
def generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
binary_path = binary.path
for section in binary.sections:
yield {
"path": binary_name,
"path": binary_path,
"name": section.name,
"offset": section.offset,
"size": section.size,
"type": section.type.name,
"type": section.type.__name__,
"content": bytes(section.content),
}

Expand All @@ -33,7 +34,7 @@ def section_name(name: str | None) -> str | None:
return name


def register(connection: apsw.Connection, binaries: list[lief.Binary]):
def register(connection: apsw.Connection, binaries: list[Binary]):
generator = elf_sections(binaries)
# setup columns and access by providing an example of the first entry returned
generator.columns, generator.column_access = apsw.ext.get_column_names(
Expand Down
10 changes: 6 additions & 4 deletions sqlelf/elf/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
import apsw.ext
import lief

from sqlelf.elf.binary import Binary

def elf_strings(binaries: list[lief.Binary]):

def elf_strings(binaries: list[Binary]):
def generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
strtabs = [
Expand All @@ -18,19 +20,19 @@ def generator() -> Iterator[dict[str, Any]]:
]
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
binary_path = binary.path
for strtab in strtabs:
# The first byte is always the null byte in the STRTAB
# Python also treats the final null in the string by creating
# an empty item so we chop it off.
# https://stackoverflow.com/a/18970869
for string in str(strtab.content[1:-1], "utf-8").split("\x00"):
yield {"path": binary_name, "section": strtab.name, "value": string}
yield {"path": binary_path, "section": strtab.name, "value": string}

return generator


def register(connection: apsw.Connection, binaries: list[lief.Binary]):
def register(connection: apsw.Connection, binaries: list[Binary]):
generator = elf_strings(binaries)
# setup columns and access by providing an example of the first entry returned
generator.columns, generator.column_access = apsw.ext.get_column_names(
Expand Down
18 changes: 10 additions & 8 deletions sqlelf/elf/symbol.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@

import apsw
import apsw.ext
import lief

from ..elf.section import section_name as elf_section_name
from sqlelf.elf.binary import Binary
from sqlelf.elf.section import section_name as elf_section_name


def elf_symbols(binaries: list[lief.Binary]):
def elf_symbols(binaries: list[Binary]):
def generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
binary_path = binary.path
for symbol in binary.symbols:
# The section index can be special numbers like 65521 or 65522
# that refer to special sections so they can't be indexed
Expand All @@ -26,9 +26,10 @@ def generator() -> Iterator[dict[str, Any]]:
if shndx == symbol.shndx
),
None,
)
) # pyright: ignore (https://github.com/lief-project/LIEF/issues/965)

yield {
"path": binary_name,
"path": binary_path,
"name": symbol.name,
"demangled_name": symbol.demangled_name,
# A bit of detailed explanation here to explain these values.
Expand All @@ -47,14 +48,15 @@ def generator() -> Iterator[dict[str, Any]]:
# TODO(fzakaria): Better understand why is it auxiliary?
# this returns versions like GLIBC_2.2.5
"version": symbol.symbol_version.symbol_version_auxiliary.name
if symbol.symbol_version.symbol_version_auxiliary
if symbol.symbol_version
and symbol.symbol_version.symbol_version_auxiliary
else None,
}

return generator


def register(connection: apsw.Connection, binaries: list[lief.Binary]):
def register(connection: apsw.Connection, binaries: list[Binary]):
generator = elf_symbols(binaries)
# setup columns and access by providing an example of the first entry returned
generator.columns, generator.column_access = apsw.ext.get_column_names(
Expand Down
7 changes: 4 additions & 3 deletions sqlelf/ldd.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
from collections import OrderedDict
from typing import Dict

import lief
import sh # pyright: ignore

from sqlelf.elf.binary import Binary

def libraries(binary: lief.Binary) -> Dict[str, str]:

def libraries(binary: Binary) -> Dict[str, str]:
"""Use the interpreter in a binary to determine the path of each linked library"""
interpreter = sh.Command(binary.interpreter) # pyright: ignore
resolution = interpreter("--list", binary.name)
resolution = interpreter("--list", binary.path)
result = OrderedDict()
# TODO: Figure out why `--list` and `ldd` produce different outcomes
# specifically for the interpreter.
Expand Down
6 changes: 3 additions & 3 deletions tests/test_ldd.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from sqlelf import ldd
import lief
from sqlelf.elf.binary import Binary
from unittest.mock import patch

def test_simple_binary_real():
binary = lief.parse("/bin/ls")
binary = Binary("/bin/ls")
result = ldd.libraries(binary)
assert len(result) > 0


@patch("sh.Command")
def test_simple_binary_mocked(Command):
binary = lief.parse("/bin/ls")
binary = Binary("/bin/ls")
Command(binary.interpreter).return_value = """
linux-vdso.so.1 (0x00007ffc5d8ff000)
/lib/x86_64-linux-gnu/libnss_cache.so.2 (0x00007f6995d92000)
Expand Down

0 comments on commit 9af1341

Please sign in to comment.