From 0f854d2d5566ac7ac652fe0dd62f049837e0dcd0 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 13 Sep 2022 10:06:56 -0700 Subject: [PATCH 1/4] drgn.helpers.linux: remove unused "# type: ignore" python/mypy#1422 was fixed awhile ago. Signed-off-by: Omar Sandoval --- drgn/helpers/linux/__init__.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drgn/helpers/linux/__init__.py b/drgn/helpers/linux/__init__.py index c46a3a571..810ab7e25 100644 --- a/drgn/helpers/linux/__init__.py +++ b/drgn/helpers/linux/__init__.py @@ -35,10 +35,7 @@ from typing import List __all__: List[str] = [] -for _module_info in pkgutil.iter_modules( - __path__, # type: ignore[name-defined] # python/mypy#1422 - prefix=__name__ + ".", -): +for _module_info in pkgutil.iter_modules(__path__, prefix=__name__ + "."): _submodule = importlib.import_module(_module_info.name) _submodule_all = getattr(_submodule, "__all__", ()) __all__.extend(_submodule_all) From 4e86a9ae5639d20c03f04c08ed7eec2539dac2b2 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 13 Sep 2022 10:15:09 -0700 Subject: [PATCH 2/4] Create drgn.helpers.common package This will contain the new modules that Nhat is adding and be the new home for some of the stuff currently in the top-level drgn.helpers module. Signed-off-by: Omar Sandoval --- drgn/helpers/common/__init__.py | 30 ++++++++++++++++++++++++++++++ drgn/internal/cli.py | 10 ++++++---- 2 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 drgn/helpers/common/__init__.py diff --git a/drgn/helpers/common/__init__.py b/drgn/helpers/common/__init__.py new file mode 100644 index 000000000..e5a3f1a4d --- /dev/null +++ b/drgn/helpers/common/__init__.py @@ -0,0 +1,30 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +Common +------ + +The ``drgn.helpers.common`` package provides helpers that can be used with any +program. The helpers are available from the individual modules in which they +are defined and from this top-level package. E.g., the following are both +valid: + +>>> from drgn.helpers.common.memory import identify_address +>>> from drgn.helpers.common import identify_address + +Some of these helpers may have additional program-specific behavior but are +otherwise generic. +""" + +import importlib +import pkgutil +from typing import List + +__all__: List[str] = [] +for _module_info in pkgutil.iter_modules(__path__, prefix=__name__ + "."): + _submodule = importlib.import_module(_module_info.name) + _submodule_all = getattr(_submodule, "__all__", ()) + __all__.extend(_submodule_all) + for _name in _submodule_all: + globals()[_name] = getattr(_submodule, _name) diff --git a/drgn/internal/cli.py b/drgn/internal/cli.py index 77536b09c..40d01d992 100644 --- a/drgn/internal/cli.py +++ b/drgn/internal/cli.py @@ -226,12 +226,14 @@ def write_history_file() -> None: sys.displayhook = displayhook - banner = """\ + banner = f"""\ For help, type help(drgn). >>> import drgn ->>> from drgn import """ + ", ".join( - drgn_globals - ) +>>> from drgn import {", ".join(drgn_globals)} +>>> from drgn.helpers.common import *""" + module = importlib.import_module("drgn.helpers.common") + for name in module.__dict__["__all__"]: + init_globals[name] = getattr(module, name) if prog.flags & drgn.ProgramFlags.IS_LINUX_KERNEL: banner += "\n>>> from drgn.helpers.linux import *" module = importlib.import_module("drgn.helpers.linux") From c2c75e5d177dbecf72ea5e01483ba4b36934caaf Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 13 Sep 2022 11:01:02 -0700 Subject: [PATCH 3/4] Move top-level drgn.helpers helpers into drgn.helpers.common submodules This is a better place to put all of these generic helpers now. I think these are all uncommon enough that it shouldn't be too much trouble to move them. Signed-off-by: Omar Sandoval --- drgn/helpers/__init__.py | 220 +----------------- drgn/helpers/common/format.py | 201 ++++++++++++++++ drgn/helpers/common/type.py | 40 ++++ drgn/helpers/linux/block.py | 2 +- drgn/helpers/linux/fs.py | 2 +- drgn/helpers/linux/mm.py | 2 +- drgn/helpers/linux/slab.py | 2 +- examples/linux/cgroup.py | 2 +- examples/linux/tcp_sock.py | 2 +- tests/helpers/common/__init__.py | 0 .../test_format.py} | 2 +- tools/bpf_inspect.py | 2 +- 12 files changed, 252 insertions(+), 225 deletions(-) create mode 100644 drgn/helpers/common/format.py create mode 100644 drgn/helpers/common/type.py create mode 100644 tests/helpers/common/__init__.py rename tests/helpers/{test_helpers.py => common/test_format.py} (97%) diff --git a/drgn/helpers/__init__.py b/drgn/helpers/__init__.py index 7ee37c99a..fc64a5ea0 100644 --- a/drgn/helpers/__init__.py +++ b/drgn/helpers/__init__.py @@ -6,228 +6,14 @@ ------- The ``drgn.helpers`` package contains subpackages which provide helpers for -working with particular types of programs. Currently, there are only helpers -for the Linux kernel. In the future, there may be helpers for, e.g., glibc and -libstdc++. - -Generic Helpers -=============== - -The top-level ``drgn.helpers`` module provides generic helpers that may be -useful for scripts or for implementing other helpers. +working with particular types of programs. Currently, there are common helpers +and helpers for the Linux kernel. In the future, there may be helpers for, +e.g., glibc and libstdc++. """ -import enum -import typing -from typing import Container, Iterable, Tuple - -from drgn import IntegerLike, Type - class ValidationError(Exception): """ Error raised by a :ref:`validator ` when an inconsistent or invalid state is detected. """ - - -def escape_ascii_character( - c: int, - escape_single_quote: bool = False, - escape_double_quote: bool = False, - escape_backslash: bool = False, -) -> str: - """ - Format an ASCII byte value as a character, possibly escaping it. - Non-printable characters are always escaped. Non-printable characters other - than ``\\0``, ``\\a``, ``\\b``, ``\\t``, ``\\n``, ``\\v``, ``\\f``, and - ``\\r`` are escaped in hexadecimal format (e.g., ``\\x7f``). By default, - printable characters are never escaped. - - :param c: The character to escape. - :param escape_single_quote: Whether to escape single quotes to ``\\'``. - :param escape_double_quote: Whether to escape double quotes to ``\\"``. - :param escape_backslash: Whether to escape backslashes to ``\\\\``. - """ - if c == 0: - return r"\0" - elif c == 7: - return r"\a" - elif c == 8: - return r"\b" - elif c == 9: - return r"\t" - elif c == 10: - return r"\n" - elif c == 11: - return r"\v" - elif c == 12: - return r"\f" - elif c == 13: - return r"\r" - elif escape_double_quote and c == 34: - return r"\"" - elif escape_single_quote and c == 39: - return r"\'" - elif escape_backslash and c == 92: - return r"\\" - elif 32 <= c <= 126: - return chr(c) - else: - return f"\\x{c:02x}" - - -def escape_ascii_string( - buffer: Iterable[int], - escape_single_quote: bool = False, - escape_double_quote: bool = False, - escape_backslash: bool = False, -) -> str: - """ - Escape an iterable of ASCII byte values (e.g., :class:`bytes` or - :class:`bytearray`). See :func:`escape_ascii_character()`. - - :param buffer: The byte array. - """ - return "".join( - escape_ascii_character( - c, - escape_single_quote=escape_single_quote, - escape_double_quote=escape_double_quote, - escape_backslash=escape_backslash, - ) - for c in buffer - ) - - -def enum_type_to_class( - type: Type, name: str, exclude: Container[str] = (), prefix: str = "" -) -> typing.Type[enum.IntEnum]: - """ - Get an :class:`enum.IntEnum` class from an enumerated :class:`drgn.Type`. - - :param type: The enumerated type to convert. - :param name: The name of the ``IntEnum`` type to create. - :param exclude: Container (e.g., list or set) of enumerator names to - exclude from the created ``IntEnum``. - :param prefix: Prefix to strip from the beginning of enumerator names. - """ - if type.enumerators is None: - raise TypeError("enum type is incomplete") - enumerators = [ - (name[len(prefix) :] if name.startswith(prefix) else name, value) - for (name, value) in type.enumerators - if name not in exclude - ] - return enum.IntEnum(name, enumerators) # type: ignore # python/mypy#4865 - - -def decode_flags( - value: IntegerLike, - flags: Iterable[Tuple[str, int]], - bit_numbers: bool = True, -) -> str: - """ - Get a human-readable representation of a bitmask of flags. - - By default, flags are specified by their bit number: - - >>> decode_flags(2, [("BOLD", 0), ("ITALIC", 1), ("UNDERLINE", 2)]) - 'ITALIC' - - They can also be specified by their value: - - >>> decode_flags(2, [("BOLD", 1), ("ITALIC", 2), ("UNDERLINE", 4)], - ... bit_numbers=False) - 'ITALIC' - - Multiple flags are combined with "|": - - >>> decode_flags(5, [("BOLD", 0), ("ITALIC", 1), ("UNDERLINE", 2)]) - 'BOLD|UNDERLINE' - - If there are multiple names for the same bit, they are all included: - - >>> decode_flags(2, [("SMALL", 0), ("BIG", 1), ("LARGE", 1)]) - 'BIG|LARGE' - - If there are any unknown bits, their raw value is included: - - >>> decode_flags(27, [("BOLD", 0), ("ITALIC", 1), ("UNDERLINE", 2)]) - 'BOLD|ITALIC|0x18' - - Zero is returned verbatim: - - >>> decode_flags(0, [("BOLD", 0), ("ITALIC", 1), ("UNDERLINE", 2)]) - '0' - - :param value: Bitmask to decode. - :param flags: List of flag names and their bit numbers or values. - :param bit_numbers: Whether *flags* specifies the bit numbers (where 0 is - the least significant bit) or values of the flags. - """ - value = value.__index__() - if value == 0: - return "0" - - parts = [] - mask = 0 - for name, flag in flags: - if bit_numbers: - flag = 1 << flag - if value & flag: - parts.append(name) - mask |= flag - - if value & ~mask: - parts.append(hex(value & ~mask)) - - return "|".join(parts) - - -def decode_enum_type_flags( - value: IntegerLike, - type: Type, - bit_numbers: bool = True, -) -> str: - """ - Get a human-readable representation of a bitmask of flags where the flags - are specified by an enumerated :class:`drgn.Type`. - - This supports enums where the values are bit numbers: - - >>> print(bits_enum) - enum style_bits { - BOLD = 0, - ITALIC = 1, - UNDERLINE = 2, - } - >>> decode_enum_type_flags(5, bits_enum) - 'BOLD|UNDERLINE' - - Or the values of the flags: - - >>> print(flags_enum) - enum style_flags { - BOLD = 1, - ITALIC = 2, - UNDERLINE = 4, - } - >>> decode_enum_type_flags(5, flags_enum, bit_numbers=False) - 'BOLD|UNDERLINE' - - See :func:`decode_flags()`. - - :param value: Bitmask to decode. - :param type: Enumerated type with bit numbers for enumerators. - :param bit_numbers: Whether the enumerator values specify the bit numbers - or values of the flags. - """ - enumerators = type.enumerators - if enumerators is None: - raise TypeError("cannot decode incomplete enumerated type") - return decode_flags( - value, - enumerators, # type: ignore # python/mypy#592 - bit_numbers, - ) diff --git a/drgn/helpers/common/format.py b/drgn/helpers/common/format.py new file mode 100644 index 000000000..e47c62905 --- /dev/null +++ b/drgn/helpers/common/format.py @@ -0,0 +1,201 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +Formatting +---------- + +The ``drgn.helpers.common.format`` module provides generic helpers for +formatting different things as text. +""" + +from typing import Iterable, Tuple + +from drgn import IntegerLike, Type + +__all__ = ( + "escape_ascii_character", + "escape_ascii_string", + "decode_flags", + "decode_enum_type_flags", +) + + +def escape_ascii_character( + c: int, + escape_single_quote: bool = False, + escape_double_quote: bool = False, + escape_backslash: bool = False, +) -> str: + """ + Format an ASCII byte value as a character, possibly escaping it. + Non-printable characters are always escaped. Non-printable characters other + than ``\\0``, ``\\a``, ``\\b``, ``\\t``, ``\\n``, ``\\v``, ``\\f``, and + ``\\r`` are escaped in hexadecimal format (e.g., ``\\x7f``). By default, + printable characters are never escaped. + + :param c: Character to escape. + :param escape_single_quote: Whether to escape single quotes to ``\\'``. + :param escape_double_quote: Whether to escape double quotes to ``\\"``. + :param escape_backslash: Whether to escape backslashes to ``\\\\``. + """ + if c == 0: + return r"\0" + elif c == 7: + return r"\a" + elif c == 8: + return r"\b" + elif c == 9: + return r"\t" + elif c == 10: + return r"\n" + elif c == 11: + return r"\v" + elif c == 12: + return r"\f" + elif c == 13: + return r"\r" + elif escape_double_quote and c == 34: + return r"\"" + elif escape_single_quote and c == 39: + return r"\'" + elif escape_backslash and c == 92: + return r"\\" + elif 32 <= c <= 126: + return chr(c) + else: + return f"\\x{c:02x}" + + +def escape_ascii_string( + buffer: Iterable[int], + escape_single_quote: bool = False, + escape_double_quote: bool = False, + escape_backslash: bool = False, +) -> str: + """ + Escape an iterable of ASCII byte values (e.g., :class:`bytes` or + :class:`bytearray`). See :func:`escape_ascii_character()`. + + :param buffer: Byte array to escape. + """ + return "".join( + escape_ascii_character( + c, + escape_single_quote=escape_single_quote, + escape_double_quote=escape_double_quote, + escape_backslash=escape_backslash, + ) + for c in buffer + ) + + +def decode_flags( + value: IntegerLike, + flags: Iterable[Tuple[str, int]], + bit_numbers: bool = True, +) -> str: + """ + Get a human-readable representation of a bitmask of flags. + + By default, flags are specified by their bit number: + + >>> decode_flags(2, [("BOLD", 0), ("ITALIC", 1), ("UNDERLINE", 2)]) + 'ITALIC' + + They can also be specified by their value: + + >>> decode_flags(2, [("BOLD", 1), ("ITALIC", 2), ("UNDERLINE", 4)], + ... bit_numbers=False) + 'ITALIC' + + Multiple flags are combined with "|": + + >>> decode_flags(5, [("BOLD", 0), ("ITALIC", 1), ("UNDERLINE", 2)]) + 'BOLD|UNDERLINE' + + If there are multiple names for the same bit, they are all included: + + >>> decode_flags(2, [("SMALL", 0), ("BIG", 1), ("LARGE", 1)]) + 'BIG|LARGE' + + If there are any unknown bits, their raw value is included: + + >>> decode_flags(27, [("BOLD", 0), ("ITALIC", 1), ("UNDERLINE", 2)]) + 'BOLD|ITALIC|0x18' + + Zero is returned verbatim: + + >>> decode_flags(0, [("BOLD", 0), ("ITALIC", 1), ("UNDERLINE", 2)]) + '0' + + :param value: Bitmask to decode. + :param flags: List of flag names and their bit numbers or values. + :param bit_numbers: Whether *flags* specifies the bit numbers (where 0 is + the least significant bit) or values of the flags. + """ + value = value.__index__() + if value == 0: + return "0" + + parts = [] + mask = 0 + for name, flag in flags: + if bit_numbers: + flag = 1 << flag + if value & flag: + parts.append(name) + mask |= flag + + if value & ~mask: + parts.append(hex(value & ~mask)) + + return "|".join(parts) + + +def decode_enum_type_flags( + value: IntegerLike, + type: Type, + bit_numbers: bool = True, +) -> str: + """ + Get a human-readable representation of a bitmask of flags where the flags + are specified by an enumerated :class:`drgn.Type`. + + This supports enums where the values are bit numbers: + + >>> print(bits_enum) + enum style_bits { + BOLD = 0, + ITALIC = 1, + UNDERLINE = 2, + } + >>> decode_enum_type_flags(5, bits_enum) + 'BOLD|UNDERLINE' + + Or the values of the flags: + + >>> print(flags_enum) + enum style_flags { + BOLD = 1, + ITALIC = 2, + UNDERLINE = 4, + } + >>> decode_enum_type_flags(5, flags_enum, bit_numbers=False) + 'BOLD|UNDERLINE' + + See :func:`decode_flags()`. + + :param value: Bitmask to decode. + :param type: Enumerated type with bit numbers for enumerators. + :param bit_numbers: Whether the enumerator values specify the bit numbers + or values of the flags. + """ + enumerators = type.enumerators + if enumerators is None: + raise TypeError("cannot decode incomplete enumerated type") + return decode_flags( + value, + enumerators, # type: ignore # python/mypy#592 + bit_numbers, + ) diff --git a/drgn/helpers/common/type.py b/drgn/helpers/common/type.py new file mode 100644 index 000000000..27197588b --- /dev/null +++ b/drgn/helpers/common/type.py @@ -0,0 +1,40 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +Types +----- + +The ``drgn.helpers.common.type`` module provides generic helpers for working +with types in ways that aren't provided by the core drgn library. +""" + +import enum +import typing +from typing import Container + +from drgn import Type + +__all__ = ("enum_type_to_class",) + + +def enum_type_to_class( + type: Type, name: str, exclude: Container[str] = (), prefix: str = "" +) -> typing.Type[enum.IntEnum]: + """ + Get an :class:`enum.IntEnum` class from an enumerated :class:`drgn.Type`. + + :param type: Enumerated type to convert. + :param name: Name of the ``IntEnum`` type to create. + :param exclude: Container (e.g., list or set) of enumerator names to + exclude from the created ``IntEnum``. + :param prefix: Prefix to strip from the beginning of enumerator names. + """ + if type.enumerators is None: + raise TypeError("enum type is incomplete") + enumerators = [ + (name[len(prefix) :] if name.startswith(prefix) else name, value) + for (name, value) in type.enumerators + if name not in exclude + ] + return enum.IntEnum(name, enumerators) # type: ignore # python/mypy#4865 diff --git a/drgn/helpers/linux/block.py b/drgn/helpers/linux/block.py index 0bbdc4f69..7f1e10a9f 100644 --- a/drgn/helpers/linux/block.py +++ b/drgn/helpers/linux/block.py @@ -15,7 +15,7 @@ from typing import Iterator from drgn import Object, Program, container_of -from drgn.helpers import escape_ascii_string +from drgn.helpers.common.format import escape_ascii_string from drgn.helpers.linux.device import MAJOR, MINOR, MKDEV from drgn.helpers.linux.list import list_for_each_entry diff --git a/drgn/helpers/linux/fs.py b/drgn/helpers/linux/fs.py index 71cbfc361..12d15d972 100644 --- a/drgn/helpers/linux/fs.py +++ b/drgn/helpers/linux/fs.py @@ -13,7 +13,7 @@ from typing import Iterator, Optional, Tuple, Union, overload from drgn import IntegerLike, Object, Path, Program, container_of, sizeof -from drgn.helpers import escape_ascii_string +from drgn.helpers.common.format import escape_ascii_string from drgn.helpers.linux.list import ( hlist_empty, hlist_for_each_entry, diff --git a/drgn/helpers/linux/mm.py b/drgn/helpers/linux/mm.py index 50aa68e4d..8452bd5a1 100644 --- a/drgn/helpers/linux/mm.py +++ b/drgn/helpers/linux/mm.py @@ -15,7 +15,7 @@ from _drgn import _linux_helper_direct_mapping_offset, _linux_helper_read_vm from drgn import IntegerLike, Object, Program, cast -from drgn.helpers import decode_enum_type_flags +from drgn.helpers.common.format import decode_enum_type_flags __all__ = ( "PFN_PHYS", diff --git a/drgn/helpers/linux/slab.py b/drgn/helpers/linux/slab.py index c16a6b14c..9defa5110 100644 --- a/drgn/helpers/linux/slab.py +++ b/drgn/helpers/linux/slab.py @@ -19,7 +19,7 @@ from typing import Iterator, Optional, Set, Union, overload from drgn import NULL, FaultError, IntegerLike, Object, Program, Type, cast, sizeof -from drgn.helpers import escape_ascii_string +from drgn.helpers.common.format import escape_ascii_string from drgn.helpers.linux.cpumask import for_each_online_cpu from drgn.helpers.linux.list import list_for_each_entry from drgn.helpers.linux.mm import ( diff --git a/examples/linux/cgroup.py b/examples/linux/cgroup.py index d0de71ad9..c0b1b1887 100755 --- a/examples/linux/cgroup.py +++ b/examples/linux/cgroup.py @@ -9,7 +9,7 @@ import sys from drgn import cast -from drgn.helpers import enum_type_to_class +from drgn.helpers.common.type import enum_type_to_class from drgn.helpers.linux import ( cgroup_bpf_prog_for_each, cgroup_path, diff --git a/examples/linux/tcp_sock.py b/examples/linux/tcp_sock.py index e077df72c..d2372c1e0 100755 --- a/examples/linux/tcp_sock.py +++ b/examples/linux/tcp_sock.py @@ -9,7 +9,7 @@ import struct from drgn import cast, container_of -from drgn.helpers import enum_type_to_class +from drgn.helpers.common.type import enum_type_to_class from drgn.helpers.linux import ( cgroup_path, hlist_for_each, diff --git a/tests/helpers/common/__init__.py b/tests/helpers/common/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/helpers/test_helpers.py b/tests/helpers/common/test_format.py similarity index 97% rename from tests/helpers/test_helpers.py rename to tests/helpers/common/test_format.py index 567d4b66c..b196f8a76 100644 --- a/tests/helpers/test_helpers.py +++ b/tests/helpers/common/test_format.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-3.0-or-later from drgn import Program, TypeEnumerator -from drgn.helpers import decode_enum_type_flags, decode_flags +from drgn.helpers.common.format import decode_enum_type_flags, decode_flags from tests import MOCK_PLATFORM, TestCase diff --git a/tools/bpf_inspect.py b/tools/bpf_inspect.py index 0ce7959f8..ec3c3fc54 100755 --- a/tools/bpf_inspect.py +++ b/tools/bpf_inspect.py @@ -4,7 +4,7 @@ import argparse -from drgn.helpers import enum_type_to_class +from drgn.helpers.common.type import enum_type_to_class from drgn.helpers.linux import bpf_map_for_each, bpf_prog_for_each, hlist_for_each_entry BpfMapType = enum_type_to_class(prog.type("enum bpf_map_type"), "BpfMapType") From 15cc473ec2909c41e90d9e42af7691f79ca54479 Mon Sep 17 00:00:00 2001 From: Nhat Pham Date: Tue, 16 Aug 2022 15:34:44 -0700 Subject: [PATCH 4/4] drgn.helpers.common.memory: add identify_address helper function This helper function identifies the type of the address (slab allocated or symbol) and returns a string representation of the address accordingly. This will be useful for another helper function which prints the stack trace with more information about each item on the stack. Signed-off-by: Nhat Pham --- drgn/helpers/common/memory.py | 95 +++++++++++++++++++++++ tests/linux_kernel/helpers/test_common.py | 50 ++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 drgn/helpers/common/memory.py create mode 100644 tests/linux_kernel/helpers/test_common.py diff --git a/drgn/helpers/common/memory.py b/drgn/helpers/common/memory.py new file mode 100644 index 000000000..6550ce4da --- /dev/null +++ b/drgn/helpers/common/memory.py @@ -0,0 +1,95 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +Memory +------ + +The ``drgn.helpers.memory`` module provides helpers for working with memory and addresses. +""" + +import operator +from typing import Optional, Union, overload + +import drgn +from drgn import IntegerLike, Object, Program, SymbolKind +from drgn.helpers.common.format import escape_ascii_string +from drgn.helpers.linux.slab import find_containing_slab_cache + +__all__ = ("identify_address",) + + +_SYMBOL_KIND_STR = { + SymbolKind.OBJECT: "object symbol", + SymbolKind.FUNC: "function symbol", +} + + +@overload +def identify_address(addr: Object) -> Optional[str]: + """""" + ... + + +@overload +def identify_address(prog: Program, addr: IntegerLike) -> Optional[str]: + """ + Try to identify what an address refers to. + + For all programs, this will identify addresses as follows: + + * Object symbols (e.g., addresses in global variables): + ``object symbol: {symbol_name}+{hex_offset}``. + * Function symbols (i.e., addresses in functions): + ``function symbol: {symbol_name}+{hex_offset}``. + * Other symbols: ``symbol: {symbol_name}+{hex_offset}``. + + Additionally, for the Linux kernel, this will identify: + + * Slab objects: ``slab object: {slab_cache_name}``. + + This may recognize other types of addresses in the future. + + The address can be given as an :class:`~drgn.Object` or as a + :class:`~drgn.Program` and an integer. + + :param addr: ``void *`` + :return: Identity as string, or ``None`` if the address is unrecognized. + """ + ... + + +def identify_address( # type: ignore # Need positional-only arguments. + prog_or_addr: Union[Program, Object], addr: Optional[IntegerLike] = None +) -> Optional[str]: + if addr is None: + assert isinstance(prog_or_addr, Object) + prog = prog_or_addr.prog_ + addr = prog_or_addr + else: + assert isinstance(prog_or_addr, Program) + prog = prog_or_addr + addr = operator.index(addr) + + if prog.flags & drgn.ProgramFlags.IS_LINUX_KERNEL: + # Linux kernel-specific identification: + slab_cache = find_containing_slab_cache(prog, addr) + + if slab_cache: + # address is slab allocated + cache_name = escape_ascii_string( + slab_cache.name.string_(), escape_backslash=True + ) + return f"slab object: {cache_name}" + + # Check if address is of a symbol: + try: + symbol = prog.symbol(addr) + except LookupError: # not a symbol + # Unrecognized address + return None + + offset = hex(addr - symbol.address) + symbol_kind = _SYMBOL_KIND_STR.get(symbol.kind, "symbol") + + return f"{symbol_kind}: {symbol.name}+{offset}" diff --git a/tests/linux_kernel/helpers/test_common.py b/tests/linux_kernel/helpers/test_common.py new file mode 100644 index 000000000..a29715380 --- /dev/null +++ b/tests/linux_kernel/helpers/test_common.py @@ -0,0 +1,50 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +from drgn.helpers.common import identify_address +from drgn.helpers.linux.mm import pfn_to_virt +from tests.linux_kernel import ( + LinuxKernelTestCase, + skip_unless_have_full_mm_support, + skip_unless_have_test_kmod, +) + + +class TestIdentifyAddress(LinuxKernelTestCase): + def test_identify_symbol(self): + symbol = self.prog.symbol("__schedule") + + self.assertIn( + identify_address(self.prog, symbol.address), + ("symbol: __sched_text_start+0x0", "function symbol: __schedule+0x0"), + ) + + self.assertEqual( + identify_address(self.prog, symbol.address + 1), + "function symbol: __schedule+0x1", + ) + + @skip_unless_have_full_mm_support + @skip_unless_have_test_kmod + def test_identify_slab_cache(self): + for size in ("small", "big"): + with self.subTest(size=size): + objects = self.prog[f"drgn_test_{size}_slab_objects"] + + if self.prog["drgn_test_slob"]: + for obj in objects: + self.assertIsNone(identify_address(obj)) + else: + for obj in objects: + self.assertEqual( + identify_address(obj), + f"slab object: drgn_test_{size}", + ) + + def test_identify_unrecognized(self): + start_addr = (pfn_to_virt(self.prog["min_low_pfn"])).value_() + end_addr = (pfn_to_virt(self.prog["max_pfn"]) + self.prog["PAGE_SIZE"]).value_() + + self.assertIsNone(identify_address(self.prog, start_addr - 1)) + self.assertIsNone(identify_address(self.prog, end_addr)) + self.assertIsNone(identify_address(self.prog, self.prog["drgn_test_va"]))