Merge branch 'master' of personal.github.com:mandiant/capa

mandiant · Mar 23, 2023 · ddc52fa · ddc52fa
2 parents 986e2e6 + 793057c
commit ddc52fa
Show file tree

Hide file tree

Showing 17 changed files with 119 additions and 42 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,10 +5,11 @@
 ### New Features
 
 - extractor: add Binary Ninja feature extractor @xusheng6
+- new cli flag `--os` to override auto-detected operating system for a sample @captainGeech42
 
 ### Breaking Changes
 
-### New Rules (20)
+### New Rules (22)
 
 - persistence/scheduled-tasks/schedule-task-via-at joren485
 - data-manipulation/prng/generate-random-numbers-via-rtlgenrandom [email protected]
@@ -30,6 +31,8 @@
 - nursery/hash-data-using-ripemd256 [email protected]
 - nursery/hash-data-using-ripemd320 [email protected]
 - nursery/set-web-proxy-in-dotnet [email protected]
+- nursery/check-for-windows-sandbox-via-subdirectory [email protected]
+- nursery/enumerate-pe-sections-in-dotnet @mr-tz
 -
 
 ### Bug Fixes

diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa)
 [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases)
-[![Number of rules](https://img.shields.io/badge/rules-787-blue.svg)](https://github.com/mandiant/capa-rules)
+[![Number of rules](https://img.shields.io/badge/rules-789-blue.svg)](https://github.com/mandiant/capa-rules)
 [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
 [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases)
 [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)

diff --git a/capa/features/common.py b/capa/features/common.py
@@ -417,6 +417,8 @@ def __init__(self, value: str, description=None):
 OS_ANY = "any"
 VALID_OS = {os.value for os in capa.features.extractors.elf.OS}
 VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY})
+# internal only, not to be used in rules
+OS_AUTO = "auto"
 
 
 class OS(Feature):

diff --git a/capa/features/extractors/common.py b/capa/features/extractors/common.py
@@ -10,7 +10,18 @@
 import capa.features.extractors.elf
 import capa.features.extractors.pefile
 import capa.features.extractors.strings
-from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String, Feature
+from capa.features.common import (
+    OS,
+    OS_AUTO,
+    FORMAT_PE,
+    FORMAT_ELF,
+    OS_WINDOWS,
+    FORMAT_FREEZE,
+    Arch,
+    Format,
+    String,
+    Feature,
+)
 from capa.features.freeze import is_freeze
 from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress
 
@@ -73,7 +84,10 @@ def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
         return
 
 
-def extract_os(buf) -> Iterator[Tuple[Feature, Address]]:
+def extract_os(buf, os=OS_AUTO) -> Iterator[Tuple[Feature, Address]]:
+    if os != OS_AUTO:
+        yield OS(os), NO_ADDRESS
+
     if buf.startswith(b"MZ"):
         yield OS(OS_WINDOWS), NO_ADDRESS
     elif buf.startswith(b"\x7fELF"):
@@ -92,8 +106,6 @@ def extract_os(buf) -> Iterator[Tuple[Feature, Address]]:
         #  2. handling a new file format (e.g. macho)
         #
         # for (1) we can't do much - its shellcode and all bets are off.
-        # we could maybe accept a further CLI argument to specify the OS,
-        # but i think this would be rarely used.
         # rules that rely on OS conditions will fail to match on shellcode.
         #
         # for (2), this logic will need to be updated as the format is implemented.

diff --git a/capa/features/extractors/viv/extractor.py b/capa/features/extractors/viv/extractor.py
@@ -25,7 +25,7 @@
 
 
 class VivisectFeatureExtractor(FeatureExtractor):
-    def __init__(self, vw, path):
+    def __init__(self, vw, path, os):
         super().__init__()
         self.vw = vw
         self.path = path
@@ -35,7 +35,7 @@ def __init__(self, vw, path):
         # pre-compute these because we'll yield them at *every* scope.
         self.global_features: List[Tuple[Feature, Address]] = []
         self.global_features.extend(capa.features.extractors.viv.file.extract_file_format(self.buf))
-        self.global_features.extend(capa.features.extractors.common.extract_os(self.buf))
+        self.global_features.extend(capa.features.extractors.common.extract_os(self.buf, os))
         self.global_features.extend(capa.features.extractors.viv.global_.extract_arch(self.vw))
 
     def get_base_address(self):

diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py
@@ -385,14 +385,14 @@ def main(argv=None):
         argv = sys.argv[1:]
 
     parser = argparse.ArgumentParser(description="save capa features to a file")
-    capa.main.install_common_args(parser, {"sample", "format", "backend", "signatures"})
+    capa.main.install_common_args(parser, {"sample", "format", "backend", "os", "signatures"})
     parser.add_argument("output", type=str, help="Path to output file")
     args = parser.parse_args(args=argv)
     capa.main.handle_common_args(args)
 
     sigpaths = capa.main.get_signatures(args.signatures)
 
-    extractor = capa.main.get_extractor(args.sample, args.format, args.backend, sigpaths, False)
+    extractor = capa.main.get_extractor(args.sample, args.format, args.os, args.backend, sigpaths, False)
 
     with open(args.output, "wb") as f:
         f.write(dump(extractor))

diff --git a/capa/ida/plugin/__init__.py b/capa/ida/plugin/__init__.py
@@ -67,7 +67,16 @@ def run(self, arg):
           arg (int): bitflag. Setting LSB enables automatic analysis upon
           loading. The other bits are currently undefined. See `form.Options`.
         """
-        self.form = CapaExplorerForm(self.PLUGIN_NAME, arg)
+        if not self.form:
+            self.form = CapaExplorerForm(self.PLUGIN_NAME, arg)
+        else:
+            widget = idaapi.find_widget(self.form.form_title)
+            if widget:
+                idaapi.activate_widget(widget, True)
+            else:
+                self.form.Show()
+                self.form.load_capa_results(False, True)
+
         return True
 
 

diff --git a/capa/main.py b/capa/main.py
@@ -58,8 +58,12 @@
 )
 from capa.exceptions import UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError, UnsupportedRuntimeError
 from capa.features.common import (
+    OS_AUTO,
+    OS_LINUX,
+    OS_MACOS,
     FORMAT_PE,
     FORMAT_ELF,
+    OS_WINDOWS,
     FORMAT_AUTO,
     FORMAT_SC32,
     FORMAT_SC64,
@@ -491,7 +495,13 @@ def get_workspace(path, format_, sigpaths):
 
 # TODO get_extractors -> List[FeatureExtractor]?
 def get_extractor(
-    path: str, format_: str, backend: str, sigpaths: List[str], should_save_workspace=False, disable_progress=False
+    path: str,
+    format_: str,
+    os: str,
+    backend: str,
+    sigpaths: List[str],
+    should_save_workspace=False,
+    disable_progress=False,
 ) -> FeatureExtractor:
     """
     raises:
@@ -506,7 +516,7 @@ def get_extractor(
         if not is_supported_arch(path):
             raise UnsupportedArchError()
 
-        if not is_supported_os(path):
+        if os == OS_AUTO and not is_supported_os(path):
             raise UnsupportedOSError()
 
     if format_ == FORMAT_DOTNET:
@@ -558,7 +568,7 @@ def get_extractor(
             else:
                 logger.debug("CAPA_SAVE_WORKSPACE unset, not saving workspace")
 
-        return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path)
+        return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, os)
 
 
 def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]:
@@ -717,6 +727,8 @@ def get_signatures(sigs_path):
 def collect_metadata(
     argv: List[str],
     sample_path: str,
+    format_: str,
+    os_: str,
     rules_path: List[str],
     extractor: capa.features.extractors.base_extractor.FeatureExtractor,
 ):
@@ -734,9 +746,9 @@ def collect_metadata(
     if rules_path != [RULES_PATH_DEFAULT_STRING]:
         rules_path = [os.path.abspath(os.path.normpath(r)) for r in rules_path]
 
-    format_ = get_format(sample_path)
+    format_ = get_format(sample_path) if format_ == FORMAT_AUTO else format_
     arch = get_arch(sample_path)
-    os_ = get_os(sample_path)
+    os_ = get_os(sample_path) if os_ == OS_AUTO else os_
 
     return {
         "timestamp": datetime.datetime.now().isoformat(),
@@ -818,6 +830,7 @@ def install_common_args(parser, wanted=None):
       wanted (Set[str]): collection of arguments to opt-into, including:
         - "sample": required positional argument to input file.
         - "format": flag to override file format.
+        - "os": flag to override file operating system.
         - "backend": flag to override analysis backend.
         - "rules": flag to override path to capa rules.
         - "tag": flag to override/specify which rules to match.
@@ -851,6 +864,7 @@ def install_common_args(parser, wanted=None):
     #
     #   - sample
     #   - format
+    #   - os
     #   - rules
     #   - tag
     #
@@ -891,6 +905,21 @@ def install_common_args(parser, wanted=None):
             default=BACKEND_VIV,
         )
 
+    if "os" in wanted:
+        oses = [
+            (OS_AUTO, "detect OS automatically - default"),
+            (OS_LINUX,),
+            (OS_MACOS,),
+            (OS_WINDOWS,),
+        ]
+        os_help = ", ".join(["%s (%s)" % (o[0], o[1]) if len(o) == 2 else o[0] for o in oses])
+        parser.add_argument(
+            "--os",
+            choices=[o[0] for o in oses],
+            default=OS_AUTO,
+            help="select sample OS: %s" % os_help,
+        )
+
     if "rules" in wanted:
         parser.add_argument(
             "-r",
@@ -1054,7 +1083,7 @@ def main(argv=None):
     parser = argparse.ArgumentParser(
         description=desc, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter
     )
-    install_common_args(parser, {"sample", "format", "backend", "signatures", "rules", "tag"})
+    install_common_args(parser, {"sample", "format", "backend", "os", "signatures", "rules", "tag"})
     parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text")
     args = parser.parse_args(args=argv)
     ret = handle_common_args(args)
@@ -1170,7 +1199,13 @@ def main(argv=None):
 
         try:
             extractor = get_extractor(
-                args.sample, format_, args.backend, sig_paths, should_save_workspace, disable_progress=args.quiet
+                args.sample,
+                format_,
+                args.os,
+                args.backend,
+                sig_paths,
+                should_save_workspace,
+                disable_progress=args.quiet,
             )
         except UnsupportedFormatError:
             log_unsupported_format_error()
@@ -1182,7 +1217,7 @@ def main(argv=None):
             log_unsupported_os_error()
             return E_INVALID_FILE_OS
 
-    meta = collect_metadata(argv, args.sample, args.rules, extractor)
+    meta = collect_metadata(argv, args.sample, args.format, args.os, args.rules, extractor)
 
     capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
     meta["analysis"].update(counts)

diff --git a/rules b/rules
diff --git a/scripts/bulk-process.py b/scripts/bulk-process.py
@@ -69,6 +69,7 @@
 import capa.rules
 import capa.render.json
 import capa.render.result_document as rd
+from capa.features.common import OS_AUTO
 
 logger = logging.getLogger("capa")
 
@@ -81,6 +82,7 @@ def get_capa_results(args):
       rules (capa.rules.RuleSet): the rules to match
       signatures (List[str]): list of file system paths to signature files
       format (str): the name of the sample file format
+      os (str): the name of the operating system
       path (str): the file system path to the sample to process
 
     args is a tuple because i'm not quite sure how to unpack multiple arguments using `map`.
@@ -96,12 +98,12 @@ def get_capa_results(args):
       meta (dict): the meta analysis results
       capabilities (dict): the matched capabilities and their result objects
     """
-    rules, sigpaths, format, path = args
+    rules, sigpaths, format, os_, path = args
     should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
     logger.info("computing capa results for: %s", path)
     try:
         extractor = capa.main.get_extractor(
-            path, format, capa.main.BACKEND_VIV, sigpaths, should_save_workspace, disable_progress=True
+            path, format, os_, capa.main.BACKEND_VIV, sigpaths, should_save_workspace, disable_progress=True
         )
     except capa.main.UnsupportedFormatError:
         # i'm 100% sure if multiprocessing will reliably raise exceptions across process boundaries.
@@ -127,7 +129,7 @@ def get_capa_results(args):
             "error": f"unexpected error: {e}",
         }
 
-    meta = capa.main.collect_metadata([], path, [], extractor)
+    meta = capa.main.collect_metadata([], path, format, os_, [], extractor)
     capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
     meta["analysis"].update(counts)
     meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities)
@@ -142,7 +144,7 @@ def main(argv=None):
         argv = sys.argv[1:]
 
         parser = argparse.ArgumentParser(description="detect capabilities in programs.")
-        capa.main.install_common_args(parser, wanted={"rules", "signatures"})
+        capa.main.install_common_args(parser, wanted={"rules", "signatures", "format", "os"})
         parser.add_argument("input", type=str, help="Path to directory of files to recursively analyze")
         parser.add_argument(
             "-n", "--parallelism", type=int, default=multiprocessing.cpu_count(), help="parallelism factor"
@@ -195,7 +197,9 @@ def map(f, args, parallelism=None):
 
         results = {}
         for result in mapper(
-            get_capa_results, [(rules, sig_paths, "pe", sample) for sample in samples], parallelism=args.parallelism
+            get_capa_results,
+            [(rules, sig_paths, "pe", OS_AUTO, sample) for sample in samples],
+            parallelism=args.parallelism,
         ):
             if result["status"] == "error":
                 logger.warning(result["error"])

diff --git a/scripts/capa_as_library.py b/scripts/capa_as_library.py
@@ -14,6 +14,7 @@
 import capa.render.result_document as rd
 import capa.features.freeze.features as frzf
 from capa.engine import *
+from capa.features.common import OS_AUTO, FORMAT_AUTO
 
 
 # == Render dictionary helpers
@@ -164,11 +165,13 @@ def capa_details(rules_path, file_path, output_format="dictionary"):
     rules = capa.main.get_rules([rules_path])
 
     # extract features and find capabilities
-    extractor = capa.main.get_extractor(file_path, "auto", capa.main.BACKEND_VIV, [], False, disable_progress=True)
+    extractor = capa.main.get_extractor(
+        file_path, FORMAT_AUTO, OS_AUTO, capa.main.BACKEND_VIV, [], False, disable_progress=True
+    )
     capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
 
     # collect metadata (used only to make rendering more complete)
-    meta = capa.main.collect_metadata([], file_path, rules_path, extractor)
+    meta = capa.main.collect_metadata([], file_path, FORMAT_AUTO, OS_AUTO, rules_path, extractor)
     meta["analysis"].update(counts)
     meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities)
 

diff --git a/scripts/lint.py b/scripts/lint.py
@@ -45,7 +45,7 @@
 import capa.helpers
 import capa.features.insn
 from capa.rules import Rule, RuleSet
-from capa.features.common import FORMAT_PE, FORMAT_DOTNET, String, Feature, Substring
+from capa.features.common import OS_AUTO, FORMAT_PE, FORMAT_DOTNET, String, Feature, Substring
 from capa.render.result_document import RuleMetadata
 
 logger = logging.getLogger("lint")
@@ -310,7 +310,9 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]:
         format_ = capa.main.get_auto_format(nice_path)
 
     logger.debug("analyzing sample: %s", nice_path)
-    extractor = capa.main.get_extractor(nice_path, format_, "", DEFAULT_SIGNATURES, False, disable_progress=True)
+    extractor = capa.main.get_extractor(
+        nice_path, format_, OS_AUTO, "", DEFAULT_SIGNATURES, False, disable_progress=True
+    )
 
     capabilities, _ = capa.main.find_capabilities(ctx.rules, extractor, disable_progress=True)
     # mypy doesn't seem to be happy with the MatchResults type alias & set(...keys())?
+1 −1		README.md
+15 −9		anti-analysis/anti-vm/vm-detection/check-for-sandbox-username-or-hostname.yml
+36 −34		anti-analysis/reference-analysis-tools-strings.yml
+6 −1		host-interaction/clipboard/read-clipboard-data.yml
+22 −0		nursery/check-for-windows-sandbox-via-subdirectory.yml
+19 −0		nursery/enumerate-pe-sections-in-dotnet.yml