diff --git a/CHANGELOG.md b/CHANGELOG.md index ef4a6af51..354aa3661 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,10 +5,11 @@ ### New Features - extractor: add Binary Ninja feature extractor @xusheng6 +- new cli flag `--os` to override auto-detected operating system for a sample @captainGeech42 ### Breaking Changes -### New Rules (20) +### New Rules (22) - persistence/scheduled-tasks/schedule-task-via-at joren485 - data-manipulation/prng/generate-random-numbers-via-rtlgenrandom william.ballenthin@mandiant.com @@ -30,6 +31,8 @@ - nursery/hash-data-using-ripemd256 raymond.leong@mandiant.com - nursery/hash-data-using-ripemd320 raymond.leong@mandiant.com - nursery/set-web-proxy-in-dotnet michael.hunhoff@mandiant.com +- nursery/check-for-windows-sandbox-via-subdirectory echernofsky@google.com +- nursery/enumerate-pe-sections-in-dotnet @mr-tz - ### Bug Fixes diff --git a/README.md b/README.md index 6e5db2acf..996467b2e 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-787-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-789-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/capa/features/common.py b/capa/features/common.py index d2f1a4ff3..062c27fa2 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -417,6 +417,8 @@ def __init__(self, value: str, description=None): OS_ANY = "any" VALID_OS = {os.value for os in capa.features.extractors.elf.OS} VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY}) +# internal only, not to be used in rules +OS_AUTO = "auto" class OS(Feature): diff --git a/capa/features/extractors/common.py b/capa/features/extractors/common.py index d72fcefd0..3c1145ce3 100644 --- a/capa/features/extractors/common.py +++ b/capa/features/extractors/common.py @@ -10,7 +10,18 @@ import capa.features.extractors.elf import capa.features.extractors.pefile import capa.features.extractors.strings -from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String, Feature +from capa.features.common import ( + OS, + OS_AUTO, + FORMAT_PE, + FORMAT_ELF, + OS_WINDOWS, + FORMAT_FREEZE, + Arch, + Format, + String, + Feature, +) from capa.features.freeze import is_freeze from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress @@ -73,7 +84,10 @@ def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]: return -def extract_os(buf) -> Iterator[Tuple[Feature, Address]]: +def extract_os(buf, os=OS_AUTO) -> Iterator[Tuple[Feature, Address]]: + if os != OS_AUTO: + yield OS(os), NO_ADDRESS + if buf.startswith(b"MZ"): yield OS(OS_WINDOWS), NO_ADDRESS elif buf.startswith(b"\x7fELF"): @@ -92,8 +106,6 @@ def extract_os(buf) -> Iterator[Tuple[Feature, Address]]: # 2. handling a new file format (e.g. macho) # # for (1) we can't do much - its shellcode and all bets are off. - # we could maybe accept a further CLI argument to specify the OS, - # but i think this would be rarely used. # rules that rely on OS conditions will fail to match on shellcode. # # for (2), this logic will need to be updated as the format is implemented. diff --git a/capa/features/extractors/viv/extractor.py b/capa/features/extractors/viv/extractor.py index e2d0fb1e1..4d877ab27 100644 --- a/capa/features/extractors/viv/extractor.py +++ b/capa/features/extractors/viv/extractor.py @@ -25,7 +25,7 @@ class VivisectFeatureExtractor(FeatureExtractor): - def __init__(self, vw, path): + def __init__(self, vw, path, os): super().__init__() self.vw = vw self.path = path @@ -35,7 +35,7 @@ def __init__(self, vw, path): # pre-compute these because we'll yield them at *every* scope. self.global_features: List[Tuple[Feature, Address]] = [] self.global_features.extend(capa.features.extractors.viv.file.extract_file_format(self.buf)) - self.global_features.extend(capa.features.extractors.common.extract_os(self.buf)) + self.global_features.extend(capa.features.extractors.common.extract_os(self.buf, os)) self.global_features.extend(capa.features.extractors.viv.global_.extract_arch(self.vw)) def get_base_address(self): diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py index e6911c5fd..9ff86b5d0 100644 --- a/capa/features/freeze/__init__.py +++ b/capa/features/freeze/__init__.py @@ -385,14 +385,14 @@ def main(argv=None): argv = sys.argv[1:] parser = argparse.ArgumentParser(description="save capa features to a file") - capa.main.install_common_args(parser, {"sample", "format", "backend", "signatures"}) + capa.main.install_common_args(parser, {"sample", "format", "backend", "os", "signatures"}) parser.add_argument("output", type=str, help="Path to output file") args = parser.parse_args(args=argv) capa.main.handle_common_args(args) sigpaths = capa.main.get_signatures(args.signatures) - extractor = capa.main.get_extractor(args.sample, args.format, args.backend, sigpaths, False) + extractor = capa.main.get_extractor(args.sample, args.format, args.os, args.backend, sigpaths, False) with open(args.output, "wb") as f: f.write(dump(extractor)) diff --git a/capa/ida/plugin/__init__.py b/capa/ida/plugin/__init__.py index 8030a8003..a24e7d38f 100644 --- a/capa/ida/plugin/__init__.py +++ b/capa/ida/plugin/__init__.py @@ -67,7 +67,16 @@ def run(self, arg): arg (int): bitflag. Setting LSB enables automatic analysis upon loading. The other bits are currently undefined. See `form.Options`. """ - self.form = CapaExplorerForm(self.PLUGIN_NAME, arg) + if not self.form: + self.form = CapaExplorerForm(self.PLUGIN_NAME, arg) + else: + widget = idaapi.find_widget(self.form.form_title) + if widget: + idaapi.activate_widget(widget, True) + else: + self.form.Show() + self.form.load_capa_results(False, True) + return True diff --git a/capa/main.py b/capa/main.py index 6a9f15bdd..f5f1178a2 100644 --- a/capa/main.py +++ b/capa/main.py @@ -58,8 +58,12 @@ ) from capa.exceptions import UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError, UnsupportedRuntimeError from capa.features.common import ( + OS_AUTO, + OS_LINUX, + OS_MACOS, FORMAT_PE, FORMAT_ELF, + OS_WINDOWS, FORMAT_AUTO, FORMAT_SC32, FORMAT_SC64, @@ -491,7 +495,13 @@ def get_workspace(path, format_, sigpaths): # TODO get_extractors -> List[FeatureExtractor]? def get_extractor( - path: str, format_: str, backend: str, sigpaths: List[str], should_save_workspace=False, disable_progress=False + path: str, + format_: str, + os: str, + backend: str, + sigpaths: List[str], + should_save_workspace=False, + disable_progress=False, ) -> FeatureExtractor: """ raises: @@ -506,7 +516,7 @@ def get_extractor( if not is_supported_arch(path): raise UnsupportedArchError() - if not is_supported_os(path): + if os == OS_AUTO and not is_supported_os(path): raise UnsupportedOSError() if format_ == FORMAT_DOTNET: @@ -558,7 +568,7 @@ def get_extractor( else: logger.debug("CAPA_SAVE_WORKSPACE unset, not saving workspace") - return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path) + return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, os) def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]: @@ -717,6 +727,8 @@ def get_signatures(sigs_path): def collect_metadata( argv: List[str], sample_path: str, + format_: str, + os_: str, rules_path: List[str], extractor: capa.features.extractors.base_extractor.FeatureExtractor, ): @@ -734,9 +746,9 @@ def collect_metadata( if rules_path != [RULES_PATH_DEFAULT_STRING]: rules_path = [os.path.abspath(os.path.normpath(r)) for r in rules_path] - format_ = get_format(sample_path) + format_ = get_format(sample_path) if format_ == FORMAT_AUTO else format_ arch = get_arch(sample_path) - os_ = get_os(sample_path) + os_ = get_os(sample_path) if os_ == OS_AUTO else os_ return { "timestamp": datetime.datetime.now().isoformat(), @@ -818,6 +830,7 @@ def install_common_args(parser, wanted=None): wanted (Set[str]): collection of arguments to opt-into, including: - "sample": required positional argument to input file. - "format": flag to override file format. + - "os": flag to override file operating system. - "backend": flag to override analysis backend. - "rules": flag to override path to capa rules. - "tag": flag to override/specify which rules to match. @@ -851,6 +864,7 @@ def install_common_args(parser, wanted=None): # # - sample # - format + # - os # - rules # - tag # @@ -891,6 +905,21 @@ def install_common_args(parser, wanted=None): default=BACKEND_VIV, ) + if "os" in wanted: + oses = [ + (OS_AUTO, "detect OS automatically - default"), + (OS_LINUX,), + (OS_MACOS,), + (OS_WINDOWS,), + ] + os_help = ", ".join(["%s (%s)" % (o[0], o[1]) if len(o) == 2 else o[0] for o in oses]) + parser.add_argument( + "--os", + choices=[o[0] for o in oses], + default=OS_AUTO, + help="select sample OS: %s" % os_help, + ) + if "rules" in wanted: parser.add_argument( "-r", @@ -1054,7 +1083,7 @@ def main(argv=None): parser = argparse.ArgumentParser( description=desc, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter ) - install_common_args(parser, {"sample", "format", "backend", "signatures", "rules", "tag"}) + install_common_args(parser, {"sample", "format", "backend", "os", "signatures", "rules", "tag"}) parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text") args = parser.parse_args(args=argv) ret = handle_common_args(args) @@ -1170,7 +1199,13 @@ def main(argv=None): try: extractor = get_extractor( - args.sample, format_, args.backend, sig_paths, should_save_workspace, disable_progress=args.quiet + args.sample, + format_, + args.os, + args.backend, + sig_paths, + should_save_workspace, + disable_progress=args.quiet, ) except UnsupportedFormatError: log_unsupported_format_error() @@ -1182,7 +1217,7 @@ def main(argv=None): log_unsupported_os_error() return E_INVALID_FILE_OS - meta = collect_metadata(argv, args.sample, args.rules, extractor) + meta = collect_metadata(argv, args.sample, args.format, args.os, args.rules, extractor) capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet) meta["analysis"].update(counts) diff --git a/rules b/rules index aa2dc1137..232af1ca4 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit aa2dc1137dca05215f71a48926c56345cc462173 +Subproject commit 232af1ca4cd8ad61c6d878dd2b893c830fdb4c8c diff --git a/scripts/bulk-process.py b/scripts/bulk-process.py index f22c55e01..51834a9ac 100644 --- a/scripts/bulk-process.py +++ b/scripts/bulk-process.py @@ -69,6 +69,7 @@ import capa.rules import capa.render.json import capa.render.result_document as rd +from capa.features.common import OS_AUTO logger = logging.getLogger("capa") @@ -81,6 +82,7 @@ def get_capa_results(args): rules (capa.rules.RuleSet): the rules to match signatures (List[str]): list of file system paths to signature files format (str): the name of the sample file format + os (str): the name of the operating system path (str): the file system path to the sample to process args is a tuple because i'm not quite sure how to unpack multiple arguments using `map`. @@ -96,12 +98,12 @@ def get_capa_results(args): meta (dict): the meta analysis results capabilities (dict): the matched capabilities and their result objects """ - rules, sigpaths, format, path = args + rules, sigpaths, format, os_, path = args should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None) logger.info("computing capa results for: %s", path) try: extractor = capa.main.get_extractor( - path, format, capa.main.BACKEND_VIV, sigpaths, should_save_workspace, disable_progress=True + path, format, os_, capa.main.BACKEND_VIV, sigpaths, should_save_workspace, disable_progress=True ) except capa.main.UnsupportedFormatError: # i'm 100% sure if multiprocessing will reliably raise exceptions across process boundaries. @@ -127,7 +129,7 @@ def get_capa_results(args): "error": f"unexpected error: {e}", } - meta = capa.main.collect_metadata([], path, [], extractor) + meta = capa.main.collect_metadata([], path, format, os_, [], extractor) capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True) meta["analysis"].update(counts) meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities) @@ -142,7 +144,7 @@ def main(argv=None): argv = sys.argv[1:] parser = argparse.ArgumentParser(description="detect capabilities in programs.") - capa.main.install_common_args(parser, wanted={"rules", "signatures"}) + capa.main.install_common_args(parser, wanted={"rules", "signatures", "format", "os"}) parser.add_argument("input", type=str, help="Path to directory of files to recursively analyze") parser.add_argument( "-n", "--parallelism", type=int, default=multiprocessing.cpu_count(), help="parallelism factor" @@ -195,7 +197,9 @@ def map(f, args, parallelism=None): results = {} for result in mapper( - get_capa_results, [(rules, sig_paths, "pe", sample) for sample in samples], parallelism=args.parallelism + get_capa_results, + [(rules, sig_paths, "pe", OS_AUTO, sample) for sample in samples], + parallelism=args.parallelism, ): if result["status"] == "error": logger.warning(result["error"]) diff --git a/scripts/capa_as_library.py b/scripts/capa_as_library.py index 884b2e944..f15ca3b4a 100644 --- a/scripts/capa_as_library.py +++ b/scripts/capa_as_library.py @@ -14,6 +14,7 @@ import capa.render.result_document as rd import capa.features.freeze.features as frzf from capa.engine import * +from capa.features.common import OS_AUTO, FORMAT_AUTO # == Render dictionary helpers @@ -164,11 +165,13 @@ def capa_details(rules_path, file_path, output_format="dictionary"): rules = capa.main.get_rules([rules_path]) # extract features and find capabilities - extractor = capa.main.get_extractor(file_path, "auto", capa.main.BACKEND_VIV, [], False, disable_progress=True) + extractor = capa.main.get_extractor( + file_path, FORMAT_AUTO, OS_AUTO, capa.main.BACKEND_VIV, [], False, disable_progress=True + ) capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True) # collect metadata (used only to make rendering more complete) - meta = capa.main.collect_metadata([], file_path, rules_path, extractor) + meta = capa.main.collect_metadata([], file_path, FORMAT_AUTO, OS_AUTO, rules_path, extractor) meta["analysis"].update(counts) meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities) diff --git a/scripts/lint.py b/scripts/lint.py index cf56f1a86..92c7fbcf9 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -45,7 +45,7 @@ import capa.helpers import capa.features.insn from capa.rules import Rule, RuleSet -from capa.features.common import FORMAT_PE, FORMAT_DOTNET, String, Feature, Substring +from capa.features.common import OS_AUTO, FORMAT_PE, FORMAT_DOTNET, String, Feature, Substring from capa.render.result_document import RuleMetadata logger = logging.getLogger("lint") @@ -310,7 +310,9 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]: format_ = capa.main.get_auto_format(nice_path) logger.debug("analyzing sample: %s", nice_path) - extractor = capa.main.get_extractor(nice_path, format_, "", DEFAULT_SIGNATURES, False, disable_progress=True) + extractor = capa.main.get_extractor( + nice_path, format_, OS_AUTO, "", DEFAULT_SIGNATURES, False, disable_progress=True + ) capabilities, _ = capa.main.find_capabilities(ctx.rules, extractor, disable_progress=True) # mypy doesn't seem to be happy with the MatchResults type alias & set(...keys())? diff --git a/scripts/profile-time.py b/scripts/profile-time.py index 0c7f0783c..09d125d89 100644 --- a/scripts/profile-time.py +++ b/scripts/profile-time.py @@ -71,7 +71,7 @@ def main(argv=None): label += " (dirty)" parser = argparse.ArgumentParser(description="Profile capa performance") - capa.main.install_common_args(parser, wanted={"format", "sample", "signatures", "rules"}) + capa.main.install_common_args(parser, wanted={"format", "os", "sample", "signatures", "rules"}) parser.add_argument("--number", type=int, default=3, help="batch size of profile collection") parser.add_argument("--repeat", type=int, default=30, help="batch count of profile collection") @@ -99,12 +99,14 @@ def main(argv=None): logger.error("%s", str(e)) return -1 - if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)): + if (args.format == "freeze") or ( + args.format == capa.features.common.FORMAT_AUTO and capa.features.freeze.is_freeze(taste) + ): with open(args.sample, "rb") as f: extractor = capa.features.freeze.load(f.read()) else: extractor = capa.main.get_extractor( - args.sample, args.format, capa.main.BACKEND_VIV, sig_paths, should_save_workspace=False + args.sample, args.format, args.os, capa.main.BACKEND_VIV, sig_paths, should_save_workspace=False ) with tqdm.tqdm(total=args.number * args.repeat) as pbar: diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py index dbd47f8f2..3f37269bd 100644 --- a/scripts/show-capabilities-by-function.py +++ b/scripts/show-capabilities-by-function.py @@ -68,6 +68,7 @@ import capa.features.freeze import capa.render.result_document as rd from capa.helpers import get_file_taste +from capa.features.common import FORMAT_AUTO from capa.features.freeze import Address logger = logging.getLogger("capa.show-capabilities-by-function") @@ -130,7 +131,7 @@ def main(argv=None): argv = sys.argv[1:] parser = argparse.ArgumentParser(description="detect capabilities in programs.") - capa.main.install_common_args(parser, wanted={"format", "backend", "sample", "signatures", "rules", "tag"}) + capa.main.install_common_args(parser, wanted={"format", "os", "backend", "sample", "signatures", "rules", "tag"}) args = parser.parse_args(args=argv) capa.main.handle_common_args(args) @@ -156,7 +157,7 @@ def main(argv=None): logger.error("%s", str(e)) return -1 - if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)): + if (args.format == "freeze") or (args.format == FORMAT_AUTO and capa.features.freeze.is_freeze(taste)): format_ = "freeze" with open(args.sample, "rb") as f: extractor = capa.features.freeze.load(f.read()) @@ -166,7 +167,7 @@ def main(argv=None): try: extractor = capa.main.get_extractor( - args.sample, args.format, args.backend, sig_paths, should_save_workspace + args.sample, args.format, args.os, args.backend, sig_paths, should_save_workspace ) except capa.exceptions.UnsupportedFormatError: capa.helpers.log_unsupported_format_error() @@ -175,7 +176,7 @@ def main(argv=None): capa.helpers.log_unsupported_runtime_error() return -1 - meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor) + meta = capa.main.collect_metadata(argv, args.sample, format_, args.os, args.rules, extractor) capabilities, counts = capa.main.find_capabilities(rules, extractor) meta["analysis"].update(counts) meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities) diff --git a/scripts/show-features.py b/scripts/show-features.py index 297977d5b..bb83bad9f 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -95,7 +95,7 @@ def main(argv=None): argv = sys.argv[1:] parser = argparse.ArgumentParser(description="Show the features that capa extracts from the given sample") - capa.main.install_common_args(parser, wanted={"format", "sample", "signatures", "backend"}) + capa.main.install_common_args(parser, wanted={"format", "os", "sample", "signatures", "backend"}) parser.add_argument("-F", "--function", type=str, help="Show features for specific function") args = parser.parse_args(args=argv) @@ -113,14 +113,16 @@ def main(argv=None): logger.error("%s", str(e)) return -1 - if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)): + if (args.format == "freeze") or ( + args.format == capa.features.common.FORMAT_AUTO and capa.features.freeze.is_freeze(taste) + ): with open(args.sample, "rb") as f: extractor = capa.features.freeze.load(f.read()) else: should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None) try: extractor = capa.main.get_extractor( - args.sample, args.format, args.backend, sig_paths, should_save_workspace + args.sample, args.format, args.os, args.backend, sig_paths, should_save_workspace ) except capa.exceptions.UnsupportedFormatError: capa.helpers.log_unsupported_format_error() diff --git a/tests/data b/tests/data index d19468ce0..3cbd7768c 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit d19468ce08c1f887626971f6ff92b9ad28c32360 +Subproject commit 3cbd7768c27fbcc77dc46d8f7bddd16834e352f1 diff --git a/tests/fixtures.py b/tests/fixtures.py index bf04e328c..7d7cd6cff 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -26,12 +26,14 @@ from capa.features.common import ( OS, OS_ANY, + OS_AUTO, OS_LINUX, ARCH_I386, FORMAT_PE, ARCH_AMD64, FORMAT_ELF, OS_WINDOWS, + FORMAT_AUTO, FORMAT_DOTNET, Arch, Format, @@ -104,9 +106,9 @@ def get_viv_extractor(path): elif "raw64" in path: vw = capa.main.get_workspace(path, "sc64", sigpaths=sigpaths) else: - vw = capa.main.get_workspace(path, "auto", sigpaths=sigpaths) + vw = capa.main.get_workspace(path, FORMAT_AUTO, sigpaths=sigpaths) vw.saveWorkspace() - extractor = capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path) + extractor = capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, OS_AUTO) fixup_viv(path, extractor) return extractor