From d6f7d2180f47dbe3c3ae267374f84b36bb910a31 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 7 Dec 2023 14:06:54 -0700 Subject: [PATCH] dotnet: combine dnfile_.py and dotnetfile.py (#1895) --- capa/features/extractors/dnfile_.py | 158 ---------------------------- capa/helpers.py | 4 +- capa/main.py | 3 +- tests/fixtures.py | 13 +-- tests/test_dotnet_features.py | 33 ------ 5 files changed, 10 insertions(+), 201 deletions(-) delete mode 100644 capa/features/extractors/dnfile_.py delete mode 100644 tests/test_dotnet_features.py diff --git a/capa/features/extractors/dnfile_.py b/capa/features/extractors/dnfile_.py deleted file mode 100644 index 72dc9b7e7..000000000 --- a/capa/features/extractors/dnfile_.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: [package root]/LICENSE.txt -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and limitations under the License. -import logging -from typing import Tuple, Iterator -from pathlib import Path - -import dnfile -import pefile - -from capa.features.common import ( - OS, - OS_ANY, - ARCH_ANY, - ARCH_I386, - FORMAT_PE, - ARCH_AMD64, - FORMAT_DOTNET, - Arch, - Format, - Feature, -) -from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress -from capa.features.extractors.base_extractor import SampleHashes, StaticFeatureExtractor - -logger = logging.getLogger(__name__) - - -def extract_file_format(**kwargs) -> Iterator[Tuple[Feature, Address]]: - yield Format(FORMAT_PE), NO_ADDRESS - yield Format(FORMAT_DOTNET), NO_ADDRESS - - -def extract_file_os(**kwargs) -> Iterator[Tuple[Feature, Address]]: - yield OS(OS_ANY), NO_ADDRESS - - -def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Feature, Address]]: - # to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020 - # .NET 4.5 added option: any CPU, 32-bit preferred - assert pe.net is not None - assert pe.net.Flags is not None - - if pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE: - yield Arch(ARCH_I386), NO_ADDRESS - elif not pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE_PLUS: - yield Arch(ARCH_AMD64), NO_ADDRESS - else: - yield Arch(ARCH_ANY), NO_ADDRESS - - -def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]: - for file_handler in FILE_HANDLERS: - for feature, address in file_handler(pe=pe): - yield feature, address - - -FILE_HANDLERS = ( - # extract_file_export_names, - # extract_file_import_names, - # extract_file_section_names, - # extract_file_strings, - # extract_file_function_names, - extract_file_format, -) - - -def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]: - for handler in GLOBAL_HANDLERS: - for feature, addr in handler(pe=pe): # type: ignore - yield feature, addr - - -GLOBAL_HANDLERS = ( - extract_file_os, - extract_file_arch, -) - - -class DnfileFeatureExtractor(StaticFeatureExtractor): - def __init__(self, path: Path): - super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes())) - self.path: Path = path - self.pe: dnfile.dnPE = dnfile.dnPE(str(path)) - - def get_base_address(self) -> AbsoluteVirtualAddress: - return AbsoluteVirtualAddress(0x0) - - def get_entry_point(self) -> int: - # self.pe.net.Flags.CLT_NATIVE_ENTRYPOINT - # True: native EP: Token - # False: managed EP: RVA - assert self.pe.net is not None - assert self.pe.net.struct is not None - - return self.pe.net.struct.EntryPointTokenOrRva - - def extract_global_features(self): - yield from extract_global_features(self.pe) - - def extract_file_features(self): - yield from extract_file_features(self.pe) - - def is_dotnet_file(self) -> bool: - return bool(self.pe.net) - - def is_mixed_mode(self) -> bool: - assert self.pe is not None - assert self.pe.net is not None - assert self.pe.net.Flags is not None - - return not bool(self.pe.net.Flags.CLR_ILONLY) - - def get_runtime_version(self) -> Tuple[int, int]: - assert self.pe is not None - assert self.pe.net is not None - assert self.pe.net.struct is not None - - return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion - - def get_meta_version_string(self) -> str: - assert self.pe.net is not None - assert self.pe.net.metadata is not None - assert self.pe.net.metadata.struct is not None - assert self.pe.net.metadata.struct.Version is not None - - vbuf = self.pe.net.metadata.struct.Version - assert isinstance(vbuf, bytes) - - return vbuf.rstrip(b"\x00").decode("utf-8") - - def get_functions(self): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") - - def extract_function_features(self, f): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") - - def get_basic_blocks(self, f): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") - - def extract_basic_block_features(self, f, bb): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") - - def get_instructions(self, f, bb): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") - - def extract_insn_features(self, f, bb, insn): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") - - def is_library_function(self, va): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") - - def get_function_name(self, va): - raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") diff --git a/capa/helpers.py b/capa/helpers.py index 45fac5bfe..fa7efc604 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -96,13 +96,13 @@ def get_auto_format(path: Path) -> str: def get_format(sample: Path) -> str: # imported locally to avoid import cycle from capa.features.extractors.common import extract_format - from capa.features.extractors.dnfile_ import DnfileFeatureExtractor + from capa.features.extractors.dotnetfile import DotnetFileFeatureExtractor buf = sample.read_bytes() for feature, _ in extract_format(buf): if feature == Format(FORMAT_PE): - dnfile_extractor = DnfileFeatureExtractor(sample) + dnfile_extractor = DotnetFileFeatureExtractor(sample) if dnfile_extractor.is_dotnet_file(): feature = Format(FORMAT_DOTNET) diff --git a/capa/main.py b/capa/main.py index 5bcacbe5c..706c442a4 100644 --- a/capa/main.py +++ b/capa/main.py @@ -44,7 +44,6 @@ import capa.render.result_document as rdoc import capa.features.extractors.common import capa.features.extractors.pefile -import capa.features.extractors.dnfile_ import capa.features.extractors.elffile import capa.features.extractors.dotnetfile import capa.features.extractors.base_extractor @@ -370,7 +369,7 @@ def get_file_extractors(sample: Path, format_: str) -> List[FeatureExtractor]: elif format_ == FORMAT_DOTNET: file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample)) - file_extractors.append(capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample)) + file_extractors.append(capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(sample)) elif format_ == capa.features.common.FORMAT_ELF: file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample)) diff --git a/tests/fixtures.py b/tests/fixtures.py index 2f8eac15a..950c439a8 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -141,10 +141,11 @@ def get_pefile_extractor(path: Path): return extractor -def get_dotnetfile_extractor(path: Path): - import capa.features.extractors.dotnetfile +@lru_cache(maxsize=1) +def get_dnfile_extractor(path: Path): + import capa.features.extractors.dnfile.extractor - extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path) + extractor = capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path) # overload the extractor so that the fixture exposes `extractor.path` setattr(extractor, "path", path.as_posix()) @@ -153,10 +154,10 @@ def get_dotnetfile_extractor(path: Path): @lru_cache(maxsize=1) -def get_dnfile_extractor(path: Path): - import capa.features.extractors.dnfile.extractor +def get_dotnetfile_extractor(path: Path): + import capa.features.extractors.dotnetfile - extractor = capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path) + extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path) # overload the extractor so that the fixture exposes `extractor.path` setattr(extractor, "path", path.as_posix()) diff --git a/tests/test_dotnet_features.py b/tests/test_dotnet_features.py deleted file mode 100644 index 51ce66b03..000000000 --- a/tests/test_dotnet_features.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: [package root]/LICENSE.txt -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and limitations under the License. -import fixtures - - -@fixtures.parametrize( - "sample,scope,feature,expected", - fixtures.FEATURE_PRESENCE_TESTS_DOTNET, - indirect=["sample", "scope"], -) -def test_dnfile_features(sample, scope, feature, expected): - fixtures.do_test_feature_presence(fixtures.get_dnfile_extractor, sample, scope, feature, expected) - - -@fixtures.parametrize( - "extractor,function,expected", - [ - ("b9f5b_dotnetfile_extractor", "is_dotnet_file", True), - ("b9f5b_dotnetfile_extractor", "is_mixed_mode", False), - ("mixed_mode_64_dotnetfile_extractor", "is_mixed_mode", True), - ("b9f5b_dotnetfile_extractor", "get_entry_point", 0x6000007), - ("b9f5b_dotnetfile_extractor", "get_runtime_version", (2, 5)), - ("b9f5b_dotnetfile_extractor", "get_meta_version_string", "v2.0.50727"), - ], -) -def test_dnfile_extractor(request, extractor, function, expected): - extractor_function = getattr(request.getfixturevalue(extractor), function) - assert extractor_function() == expected