Skip to content

Commit

Permalink
Merge branch 'master' of personal.github.com:mandiant/capa
Browse files Browse the repository at this point in the history
  • Loading branch information
williballenthin committed Mar 23, 2023
2 parents 986e2e6 + 793057c commit ddc52fa
Show file tree
Hide file tree
Showing 17 changed files with 119 additions and 42 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
### New Features

- extractor: add Binary Ninja feature extractor @xusheng6
- new cli flag `--os` to override auto-detected operating system for a sample @captainGeech42

### Breaking Changes

### New Rules (20)
### New Rules (22)

- persistence/scheduled-tasks/schedule-task-via-at joren485
- data-manipulation/prng/generate-random-numbers-via-rtlgenrandom [email protected]
Expand All @@ -30,6 +31,8 @@
- nursery/hash-data-using-ripemd256 [email protected]
- nursery/hash-data-using-ripemd320 [email protected]
- nursery/set-web-proxy-in-dotnet [email protected]
- nursery/check-for-windows-sandbox-via-subdirectory [email protected]
- nursery/enumerate-pe-sections-in-dotnet @mr-tz
-

### Bug Fixes
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa)
[![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases)
[![Number of rules](https://img.shields.io/badge/rules-787-blue.svg)](https://github.com/mandiant/capa-rules)
[![Number of rules](https://img.shields.io/badge/rules-789-blue.svg)](https://github.com/mandiant/capa-rules)
[![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
[![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases)
[![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)
Expand Down
2 changes: 2 additions & 0 deletions capa/features/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,8 @@ def __init__(self, value: str, description=None):
OS_ANY = "any"
VALID_OS = {os.value for os in capa.features.extractors.elf.OS}
VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY})
# internal only, not to be used in rules
OS_AUTO = "auto"


class OS(Feature):
Expand Down
20 changes: 16 additions & 4 deletions capa/features/extractors/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,18 @@
import capa.features.extractors.elf
import capa.features.extractors.pefile
import capa.features.extractors.strings
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String, Feature
from capa.features.common import (
OS,
OS_AUTO,
FORMAT_PE,
FORMAT_ELF,
OS_WINDOWS,
FORMAT_FREEZE,
Arch,
Format,
String,
Feature,
)
from capa.features.freeze import is_freeze
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress

Expand Down Expand Up @@ -73,7 +84,10 @@ def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
return


def extract_os(buf) -> Iterator[Tuple[Feature, Address]]:
def extract_os(buf, os=OS_AUTO) -> Iterator[Tuple[Feature, Address]]:
if os != OS_AUTO:
yield OS(os), NO_ADDRESS

if buf.startswith(b"MZ"):
yield OS(OS_WINDOWS), NO_ADDRESS
elif buf.startswith(b"\x7fELF"):
Expand All @@ -92,8 +106,6 @@ def extract_os(buf) -> Iterator[Tuple[Feature, Address]]:
# 2. handling a new file format (e.g. macho)
#
# for (1) we can't do much - its shellcode and all bets are off.
# we could maybe accept a further CLI argument to specify the OS,
# but i think this would be rarely used.
# rules that rely on OS conditions will fail to match on shellcode.
#
# for (2), this logic will need to be updated as the format is implemented.
Expand Down
4 changes: 2 additions & 2 deletions capa/features/extractors/viv/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@


class VivisectFeatureExtractor(FeatureExtractor):
def __init__(self, vw, path):
def __init__(self, vw, path, os):
super().__init__()
self.vw = vw
self.path = path
Expand All @@ -35,7 +35,7 @@ def __init__(self, vw, path):
# pre-compute these because we'll yield them at *every* scope.
self.global_features: List[Tuple[Feature, Address]] = []
self.global_features.extend(capa.features.extractors.viv.file.extract_file_format(self.buf))
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf))
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf, os))
self.global_features.extend(capa.features.extractors.viv.global_.extract_arch(self.vw))

def get_base_address(self):
Expand Down
4 changes: 2 additions & 2 deletions capa/features/freeze/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,14 +385,14 @@ def main(argv=None):
argv = sys.argv[1:]

parser = argparse.ArgumentParser(description="save capa features to a file")
capa.main.install_common_args(parser, {"sample", "format", "backend", "signatures"})
capa.main.install_common_args(parser, {"sample", "format", "backend", "os", "signatures"})
parser.add_argument("output", type=str, help="Path to output file")
args = parser.parse_args(args=argv)
capa.main.handle_common_args(args)

sigpaths = capa.main.get_signatures(args.signatures)

extractor = capa.main.get_extractor(args.sample, args.format, args.backend, sigpaths, False)
extractor = capa.main.get_extractor(args.sample, args.format, args.os, args.backend, sigpaths, False)

with open(args.output, "wb") as f:
f.write(dump(extractor))
Expand Down
11 changes: 10 additions & 1 deletion capa/ida/plugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,16 @@ def run(self, arg):
arg (int): bitflag. Setting LSB enables automatic analysis upon
loading. The other bits are currently undefined. See `form.Options`.
"""
self.form = CapaExplorerForm(self.PLUGIN_NAME, arg)
if not self.form:
self.form = CapaExplorerForm(self.PLUGIN_NAME, arg)
else:
widget = idaapi.find_widget(self.form.form_title)
if widget:
idaapi.activate_widget(widget, True)
else:
self.form.Show()
self.form.load_capa_results(False, True)

return True


Expand Down
51 changes: 43 additions & 8 deletions capa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,12 @@
)
from capa.exceptions import UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError, UnsupportedRuntimeError
from capa.features.common import (
OS_AUTO,
OS_LINUX,
OS_MACOS,
FORMAT_PE,
FORMAT_ELF,
OS_WINDOWS,
FORMAT_AUTO,
FORMAT_SC32,
FORMAT_SC64,
Expand Down Expand Up @@ -491,7 +495,13 @@ def get_workspace(path, format_, sigpaths):

# TODO get_extractors -> List[FeatureExtractor]?
def get_extractor(
path: str, format_: str, backend: str, sigpaths: List[str], should_save_workspace=False, disable_progress=False
path: str,
format_: str,
os: str,
backend: str,
sigpaths: List[str],
should_save_workspace=False,
disable_progress=False,
) -> FeatureExtractor:
"""
raises:
Expand All @@ -506,7 +516,7 @@ def get_extractor(
if not is_supported_arch(path):
raise UnsupportedArchError()

if not is_supported_os(path):
if os == OS_AUTO and not is_supported_os(path):
raise UnsupportedOSError()

if format_ == FORMAT_DOTNET:
Expand Down Expand Up @@ -558,7 +568,7 @@ def get_extractor(
else:
logger.debug("CAPA_SAVE_WORKSPACE unset, not saving workspace")

return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path)
return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, os)


def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]:
Expand Down Expand Up @@ -717,6 +727,8 @@ def get_signatures(sigs_path):
def collect_metadata(
argv: List[str],
sample_path: str,
format_: str,
os_: str,
rules_path: List[str],
extractor: capa.features.extractors.base_extractor.FeatureExtractor,
):
Expand All @@ -734,9 +746,9 @@ def collect_metadata(
if rules_path != [RULES_PATH_DEFAULT_STRING]:
rules_path = [os.path.abspath(os.path.normpath(r)) for r in rules_path]

format_ = get_format(sample_path)
format_ = get_format(sample_path) if format_ == FORMAT_AUTO else format_
arch = get_arch(sample_path)
os_ = get_os(sample_path)
os_ = get_os(sample_path) if os_ == OS_AUTO else os_

return {
"timestamp": datetime.datetime.now().isoformat(),
Expand Down Expand Up @@ -818,6 +830,7 @@ def install_common_args(parser, wanted=None):
wanted (Set[str]): collection of arguments to opt-into, including:
- "sample": required positional argument to input file.
- "format": flag to override file format.
- "os": flag to override file operating system.
- "backend": flag to override analysis backend.
- "rules": flag to override path to capa rules.
- "tag": flag to override/specify which rules to match.
Expand Down Expand Up @@ -851,6 +864,7 @@ def install_common_args(parser, wanted=None):
#
# - sample
# - format
# - os
# - rules
# - tag
#
Expand Down Expand Up @@ -891,6 +905,21 @@ def install_common_args(parser, wanted=None):
default=BACKEND_VIV,
)

if "os" in wanted:
oses = [
(OS_AUTO, "detect OS automatically - default"),
(OS_LINUX,),
(OS_MACOS,),
(OS_WINDOWS,),
]
os_help = ", ".join(["%s (%s)" % (o[0], o[1]) if len(o) == 2 else o[0] for o in oses])
parser.add_argument(
"--os",
choices=[o[0] for o in oses],
default=OS_AUTO,
help="select sample OS: %s" % os_help,
)

if "rules" in wanted:
parser.add_argument(
"-r",
Expand Down Expand Up @@ -1054,7 +1083,7 @@ def main(argv=None):
parser = argparse.ArgumentParser(
description=desc, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter
)
install_common_args(parser, {"sample", "format", "backend", "signatures", "rules", "tag"})
install_common_args(parser, {"sample", "format", "backend", "os", "signatures", "rules", "tag"})
parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text")
args = parser.parse_args(args=argv)
ret = handle_common_args(args)
Expand Down Expand Up @@ -1170,7 +1199,13 @@ def main(argv=None):

try:
extractor = get_extractor(
args.sample, format_, args.backend, sig_paths, should_save_workspace, disable_progress=args.quiet
args.sample,
format_,
args.os,
args.backend,
sig_paths,
should_save_workspace,
disable_progress=args.quiet,
)
except UnsupportedFormatError:
log_unsupported_format_error()
Expand All @@ -1182,7 +1217,7 @@ def main(argv=None):
log_unsupported_os_error()
return E_INVALID_FILE_OS

meta = collect_metadata(argv, args.sample, args.rules, extractor)
meta = collect_metadata(argv, args.sample, args.format, args.os, args.rules, extractor)

capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
meta["analysis"].update(counts)
Expand Down
14 changes: 9 additions & 5 deletions scripts/bulk-process.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
import capa.rules
import capa.render.json
import capa.render.result_document as rd
from capa.features.common import OS_AUTO

logger = logging.getLogger("capa")

Expand All @@ -81,6 +82,7 @@ def get_capa_results(args):
rules (capa.rules.RuleSet): the rules to match
signatures (List[str]): list of file system paths to signature files
format (str): the name of the sample file format
os (str): the name of the operating system
path (str): the file system path to the sample to process
args is a tuple because i'm not quite sure how to unpack multiple arguments using `map`.
Expand All @@ -96,12 +98,12 @@ def get_capa_results(args):
meta (dict): the meta analysis results
capabilities (dict): the matched capabilities and their result objects
"""
rules, sigpaths, format, path = args
rules, sigpaths, format, os_, path = args
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
logger.info("computing capa results for: %s", path)
try:
extractor = capa.main.get_extractor(
path, format, capa.main.BACKEND_VIV, sigpaths, should_save_workspace, disable_progress=True
path, format, os_, capa.main.BACKEND_VIV, sigpaths, should_save_workspace, disable_progress=True
)
except capa.main.UnsupportedFormatError:
# i'm 100% sure if multiprocessing will reliably raise exceptions across process boundaries.
Expand All @@ -127,7 +129,7 @@ def get_capa_results(args):
"error": f"unexpected error: {e}",
}

meta = capa.main.collect_metadata([], path, [], extractor)
meta = capa.main.collect_metadata([], path, format, os_, [], extractor)
capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
meta["analysis"].update(counts)
meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities)
Expand All @@ -142,7 +144,7 @@ def main(argv=None):
argv = sys.argv[1:]

parser = argparse.ArgumentParser(description="detect capabilities in programs.")
capa.main.install_common_args(parser, wanted={"rules", "signatures"})
capa.main.install_common_args(parser, wanted={"rules", "signatures", "format", "os"})
parser.add_argument("input", type=str, help="Path to directory of files to recursively analyze")
parser.add_argument(
"-n", "--parallelism", type=int, default=multiprocessing.cpu_count(), help="parallelism factor"
Expand Down Expand Up @@ -195,7 +197,9 @@ def map(f, args, parallelism=None):

results = {}
for result in mapper(
get_capa_results, [(rules, sig_paths, "pe", sample) for sample in samples], parallelism=args.parallelism
get_capa_results,
[(rules, sig_paths, "pe", OS_AUTO, sample) for sample in samples],
parallelism=args.parallelism,
):
if result["status"] == "error":
logger.warning(result["error"])
Expand Down
7 changes: 5 additions & 2 deletions scripts/capa_as_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import capa.render.result_document as rd
import capa.features.freeze.features as frzf
from capa.engine import *
from capa.features.common import OS_AUTO, FORMAT_AUTO


# == Render dictionary helpers
Expand Down Expand Up @@ -164,11 +165,13 @@ def capa_details(rules_path, file_path, output_format="dictionary"):
rules = capa.main.get_rules([rules_path])

# extract features and find capabilities
extractor = capa.main.get_extractor(file_path, "auto", capa.main.BACKEND_VIV, [], False, disable_progress=True)
extractor = capa.main.get_extractor(
file_path, FORMAT_AUTO, OS_AUTO, capa.main.BACKEND_VIV, [], False, disable_progress=True
)
capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)

# collect metadata (used only to make rendering more complete)
meta = capa.main.collect_metadata([], file_path, rules_path, extractor)
meta = capa.main.collect_metadata([], file_path, FORMAT_AUTO, OS_AUTO, rules_path, extractor)
meta["analysis"].update(counts)
meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities)

Expand Down
6 changes: 4 additions & 2 deletions scripts/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
import capa.helpers
import capa.features.insn
from capa.rules import Rule, RuleSet
from capa.features.common import FORMAT_PE, FORMAT_DOTNET, String, Feature, Substring
from capa.features.common import OS_AUTO, FORMAT_PE, FORMAT_DOTNET, String, Feature, Substring
from capa.render.result_document import RuleMetadata

logger = logging.getLogger("lint")
Expand Down Expand Up @@ -310,7 +310,9 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]:
format_ = capa.main.get_auto_format(nice_path)

logger.debug("analyzing sample: %s", nice_path)
extractor = capa.main.get_extractor(nice_path, format_, "", DEFAULT_SIGNATURES, False, disable_progress=True)
extractor = capa.main.get_extractor(
nice_path, format_, OS_AUTO, "", DEFAULT_SIGNATURES, False, disable_progress=True
)

capabilities, _ = capa.main.find_capabilities(ctx.rules, extractor, disable_progress=True)
# mypy doesn't seem to be happy with the MatchResults type alias & set(...keys())?
Expand Down
Loading

0 comments on commit ddc52fa

Please sign in to comment.