From 2dbac05716947d44aab7646721184422f8b9fd17 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 15 Dec 2023 17:23:19 -0700 Subject: [PATCH] ghidra: fix IndexError exception (#1879) * ghidra: fix IndexError exception --- capa/features/extractors/ghidra/insn.py | 33 ++++++++++++++----------- scripts/capa2yara.py | 17 ++++++++++++- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/capa/features/extractors/ghidra/insn.py b/capa/features/extractors/ghidra/insn.py index 2404207ce..61a96154d 100644 --- a/capa/features/extractors/ghidra/insn.py +++ b/capa/features/extractors/ghidra/insn.py @@ -195,20 +195,25 @@ def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl if insn.getMnemonicString().startswith("LEA"): return - # ignore any stack references - if not capa.features.extractors.ghidra.helpers.is_stack_referenced(insn): - # Ghidra stores operands in 2D arrays if they contain offsets - for i in range(insn.getNumOperands()): - if insn.getOperandType(i) == OperandType.DYNAMIC: # e.g. [esi + 4] - # manual extraction, since the default api calls only work on the 1st dimension of the array - op_objs = insn.getOpObjects(i) - if isinstance(op_objs[-1], ghidra.program.model.scalar.Scalar): - op_off = op_objs[-1].getValue() - yield Offset(op_off), ih.address - yield OperandOffset(i, op_off), ih.address - else: - yield Offset(0), ih.address - yield OperandOffset(i, 0), ih.address + if capa.features.extractors.ghidra.helpers.is_stack_referenced(insn): + # ignore stack references + return + + # Ghidra stores operands in 2D arrays if they contain offsets + for i in range(insn.getNumOperands()): + if insn.getOperandType(i) == OperandType.DYNAMIC: # e.g. [esi + 4] + # manual extraction, since the default api calls only work on the 1st dimension of the array + op_objs = insn.getOpObjects(i) + if not op_objs: + continue + + if isinstance(op_objs[-1], ghidra.program.model.scalar.Scalar): + op_off = op_objs[-1].getValue() + else: + op_off = 0 + + yield Offset(op_off), ih.address + yield OperandOffset(i, op_off), ih.address def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: diff --git a/scripts/capa2yara.py b/scripts/capa2yara.py index a146892d9..5fe5c0849 100644 --- a/scripts/capa2yara.py +++ b/scripts/capa2yara.py @@ -61,7 +61,22 @@ # this have to be the internal names used by capa.py which are sometimes different to the ones written out in the rules, e.g. "2 or more" is "Some", count is Range -unsupported = ["characteristic", "mnemonic", "offset", "subscope", "Range", "os", "property", "format", "class", "operand[0].number", "operand[1].number", "substring", "arch", "namespace"] +unsupported = [ + "characteristic", + "mnemonic", + "offset", + "subscope", + "Range", + "os", + "property", + "format", + "class", + "operand[0].number", + "operand[1].number", + "substring", + "arch", + "namespace", +] # further idea: shorten this list, possible stuff: # - 2 or more strings: e.g. # -- https://github.com/mandiant/capa-rules/blob/master/collection/file-managers/gather-direct-ftp-information.yml