diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3175c5a5..f316bc37 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -26,22 +26,53 @@ We rely on contributors to test any changes before they are submitted as pull re New scanners should be accompanied by a [pytest](https://docs.pytest.org/) based test in `src/python/strelka/tests`, along with **non-malicous** and reasonably sized sample files in `src/python/strelka/tests/fixtures`. -pytest is run when the docker container is built to assure scanners will work at runtime. +New fixtures should also be accompanied by updates to the configuration tests in `tests_configuration`. Changes to tastes or scanner assignments will require updates to these tests. -Run pytest manually: +The best way to run Strelka's test suite is to build the docker containers. Some of Strelka's scanners have OS level dependencies which make them unsuitable for individual testing. + +``` +docker-compose -f build/docker-compose.yaml build -```bash -cd src/python/strelka/ -python -m pytest tests/ ============================= test session starts ============================== -platform linux -- Python 3.10.7, pytest-7.2.0, pluggy-1.0.0 -rootdir: /strelka/src/python -plugins: mock-3.10.0 -collected 9 items +platform linux -- Python 3.10.6, pytest-7.2.0, pluggy-1.0.0 +rootdir: /strelka +plugins: mock-3.10.0, unordered-0.5.2 +collected 92 items + +tests/test_required_for_scanner.py . +tests/test_scan_base64.py . +tests/test_scan_base64_pe.py . +tests/test_scan_batch.py . +tests/test_scan_bmp_eof.py . ... -============================== 9 passed in 0.48s =============================== +tests/test_scan_upx.py . +tests/test_scan_url.py .. +tests/test_scan_vhd.py .. +tests/test_scan_x509.py .. +tests/test_scan_xml.py . +tests/test_scan_yara.py . +tests/test_scan_zip.py .. + +======================= 92 passed, 29 warnings in 27.93s ======================= +``` + +If you're testing with the default backend.yaml and taste.yara, enable `CONFIG_TESTS` to assure the configuration works as expected. + +``` +docker-compose -f build/docker-compose.yaml build --build-arg CONFIG_TESTS=true backend + +============================= test session starts ============================== +platform linux -- Python 3.10.6, pytest-7.2.0, pluggy-1.0.0 +rootdir: /strelka +plugins: mock-3.10.0, unordered-0.5.2 +collected 155 items + +tests_configuration/test_scanner_assignment.py ............................................................................. +tests_configuration/test_taste.py .............................................................................. + +======================= 155 passed, 4 warnings in 8.55s ======================== ``` ## Style Guides diff --git a/build/python/backend/Dockerfile b/build/python/backend/Dockerfile index 98654edf..414e8467 100644 --- a/build/python/backend/Dockerfile +++ b/build/python/backend/Dockerfile @@ -2,6 +2,8 @@ FROM ubuntu:22.04 ARG DEBIAN_FRONTEND=noninteractive LABEL maintainer="Target Brands, Inc. TTS-CFC-OpenSource@target.com" +ARG CONFIG_TESTS=false + ARG USERNAME=strelka ARG USER_UID=1001 ARG USER_GID=$USER_UID @@ -185,12 +187,13 @@ RUN cd /strelka/ && \ chmod -R g=u /var/log/strelka/ # Run tests as non-root user -USER $username +USER $USERNAME # Run build checks -RUN echo '[+] Run checks' && \ +RUN echo '[+] Run build checks' && \ cd /strelka/strelka/ && \ python3 -m pytest -s tests/ && \ + if $CONFIG_TESTS; then python3 -m pytest -s tests_configuration/; fi && \ echo '[+] Done' USER root @@ -202,4 +205,4 @@ RUN cd /strelka/ && \ # Remove config directory (will bind mount once built) RUN rm -rf /etc/strelka/ -USER $username +USER $USERNAME diff --git a/docs/README.md b/docs/README.md index 2b53ca39..9672ad3d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -613,50 +613,53 @@ The table below describes each scanner and its options. Each scanner has the hid | ScanZlib | Decompresses gzip files | N/A ## Tests -As Strelka consists of many scanners and dependencies for those scanners, Pytests are particularly valuable for testing the ongoing functionality of Strelka and it's scanners. Tests allow users to write test cases that verify the correct behavior of Strelka scanners to ensure that the scanners remain reliable and accurate. Additionally, using pytests can help streamline the development process, allowing developers to focus on writing new features and improvements for the scanners. The following section details how to setup Pytests. +As Strelka consists of many scanners and dependencies for those scanners. Pytests are particularly valuable for testing the ongoing functionality of Strelka and it's scanners. Tests allow users to write test cases that verify the correct behavior of Strelka scanners to ensure that the scanners remain reliable and accurate. Additionally, using pytests can help streamline the development process, allowing developers to focus on writing new features and improvements for the scanners. Strelka contains a set of standard test fixture files that represent the types of files Strelka ingests. -If using Strelka on Github, this repository supports Github Actions which runs on Pull Requests - -### Tests Setup -Here are the steps for setting up a virtualenv virtual environment, installing requirements from src/python/requirements.txt, and running pytest: - -1. Install virtualenv, if it is not already installed: - -``` -pip install virtualenv -``` -2. Create a new virtual environment: - -``` -virtualenv -``` - -3. Activate the virtual environment: +The best way to run Strelka's test suite is to build the docker containers. Some of Strelka's scanners have OS level dependencies which make them unsuitable for individual testing. ``` -source /bin/activate +docker-compose -f build/docker-compose.yaml build backend + +============================= test session starts ============================== +platform linux -- Python 3.10.6, pytest-7.2.0, pluggy-1.0.0 +rootdir: /strelka +plugins: mock-3.10.0, unordered-0.5.2 +collected 92 items + +tests/test_required_for_scanner.py . +tests/test_scan_base64.py . +tests/test_scan_base64_pe.py . +tests/test_scan_batch.py . +tests/test_scan_bmp_eof.py . + +... + +tests/test_scan_upx.py . +tests/test_scan_url.py .. +tests/test_scan_vhd.py .. +tests/test_scan_x509.py .. +tests/test_scan_xml.py . +tests/test_scan_yara.py . +tests/test_scan_zip.py .. + +======================= 92 passed, 29 warnings in 27.93s ======================= ``` -4. Install the requirements from src/python/requirements.txt: +If you're testing with the default backend.yaml and taste.yara, enable `CONFIG_TESTS` to assure the configuration works as expected. ``` -pip install -r src/python/requirements.txt -``` +docker-compose -f build/docker-compose.yaml build --build-arg CONFIG_TESTS=true backend -5. Run pytest to execute the test cases: +============================= test session starts ============================== +platform linux -- Python 3.10.6, pytest-7.2.0, pluggy-1.0.0 +rootdir: /strelka +plugins: mock-3.10.0, unordered-0.5.2 +collected 155 items -``` -pytest -``` - -Upon execution, you will be provided the successes and failures for any available scanner test. +tests_configuration/test_scanner_assignment.py ............................................................................. +tests_configuration/test_taste.py .............................................................................. -``` -Some tests (e.g., ScanCapa, ScanDmg, ScanOCR) may fail on local host testing as they rely on -additional executables to run via `subprocess` that are not installed via `pip`. If you wish -to verify these tests, either install the relevant executable (which can be observed in Backend -Dockerfile - build/python/backend/Dockerfile) or simply build Strelka - of which the docker build -logs show test outcomes. +======================= 155 passed, 4 warnings in 8.55s ======================== ``` ## Use Cases diff --git a/src/python/bin/strelka-backend b/src/python/bin/strelka-backend index fde78628..a0aa86eb 100644 --- a/src/python/bin/strelka-backend +++ b/src/python/bin/strelka-backend @@ -5,338 +5,71 @@ strelka-backend Command line utility for running Strelka backend server components. """ import argparse -import glob -import importlib import logging.config -import math import os -import re -import string import sys -import signal -import time - -import inflection -import magic import redis import yaml -import yara - -from strelka import strelka - - -class Backend(object): - def __init__(self, backend_cfg, coordinator): - self.scanner_cache = {} - self.backend_cfg = backend_cfg - self.coordinator = coordinator - self.limits = backend_cfg.get('limits') - self.scanners = backend_cfg.get('scanners') - - self.compiled_magic = magic.Magic( - magic_file=backend_cfg.get('tasting').get('mime_db'), - mime=True, - ) - - yara_rules = backend_cfg.get('tasting').get('yara_rules') - if os.path.isdir(yara_rules): - yara_filepaths = {} - globbed_yara = glob.iglob( - f'{yara_rules}/**/*.yar*', - recursive=True, - ) - for (i, entry) in enumerate(globbed_yara): - yara_filepaths[f'namespace{i}'] = entry - self.compiled_yara = yara.compile(filepaths=yara_filepaths) - else: - self.compiled_yara = yara.compile(filepath=yara_rules) - - def work(self): - logging.info('starting up') - - count = 0 - work_start = time.time() - work_expire = work_start + self.limits.get('time_to_live') - - while 1: - if self.limits.get('max_files') != 0: - if count >= self.limits.get('max_files'): - break - if self.limits.get('time_to_live') != 0: - if time.time() >= work_expire: - break - - task = self.coordinator.zpopmin('tasks', count=1) - if len(task) == 0: - time.sleep(0.25) - continue - - (root_id, expire_at) = task[0] - root_id = root_id.decode() - file = strelka.File(pointer=root_id) - expire_at = math.ceil(expire_at) - timeout = math.ceil(expire_at - time.time()) - if timeout <= 0: - continue - - try: - self.signal = signal.signal( - signal.SIGALRM, - timeout_handler(strelka.RequestTimeout) - ) - signal.alarm(timeout) - self.distribute(root_id, file, expire_at) - p = self.coordinator.pipeline(transaction=False) - p.rpush(f'event:{root_id}', 'FIN') - p.expireat(f'event:{root_id}', expire_at) - p.execute() - signal.alarm(0) - except strelka.RequestTimeout: - logging.debug(f'request {root_id} timed out') - except Exception: - signal.alarm(0) - logging.exception('unknown exception (see traceback below)') - - count += 1 - - logging.info(f'shutdown after scanning {count} file(s) and' - f' {time.time() - work_start} second(s)') - - def taste_mime(self, data): - """Tastes file data with libmagic.""" - return [self.compiled_magic.from_buffer(data)] - - def taste_yara(self, data): - """Tastes file data with YARA.""" - encoded_whitespace = string.whitespace.encode() - stripped_data = data.lstrip(encoded_whitespace) - yara_matches = self.compiled_yara.match(data=stripped_data) - return [match.rule for match in yara_matches] - - def distribute(self, root_id, file, expire_at): - """Distributes a file through scanners.""" - try: - files = [] - try: - self.signal = signal.signal( - signal.SIGALRM, - timeout_handler(strelka.DistributionTimeout) - ) - signal.alarm(self.limits.get('distribution')) - if file.depth > self.limits.get('max_depth'): - logging.info(f'request {root_id} exceeded maximum depth') - return - data = b'' - while 1: - pop = self.coordinator.lpop(f'data:{file.pointer}') - if pop is None: - break - data += pop - - file.add_flavors({'mime': self.taste_mime(data)}) - file.add_flavors({'yara': self.taste_yara(data)}) - flavors = ( - file.flavors.get('external', []) - + file.flavors.get('mime', []) - + file.flavors.get('yara', []) - ) - - scanner_list = [] - for name in self.scanners: - mappings = self.scanners.get(name, {}) - assigned = self.assign_scanner( - name, - mappings, - flavors, - file, - ) - if assigned is not None: - scanner_list.append(assigned) - scanner_list.sort( - key=lambda k: k.get('priority', 5), - reverse=True, - ) - - p = self.coordinator.pipeline(transaction=False) - tree_dict = { - 'node': file.uid, - 'parent': file.parent, - 'root': root_id, - } - - if file.depth == 0: - tree_dict['node'] = root_id - if file.depth == 1: - tree_dict['parent'] = root_id - - file_dict = { - 'depth': file.depth, - 'name': file.name, - 'flavors': file.flavors, - 'scanners': [s.get('name') for s in scanner_list], - 'size': len(data), - 'source': file.source, - 'tree': tree_dict, - } - scan = {} - - for scanner in scanner_list: - try: - name = scanner['name'] - und_name = inflection.underscore(name) - scanner_import = f'strelka.scanners.{und_name}' - module = importlib.import_module(scanner_import) - if und_name not in self.scanner_cache: - attr = getattr(module, name)(self.backend_cfg, self.coordinator) - self.scanner_cache[und_name] = attr - options = scanner.get('options', {}) - plugin = self.scanner_cache[und_name] - (f, s) = plugin.scan_wrapper( - data, - file, - options, - expire_at, - ) - files.extend(f) - - scan = { - **scan, - **s, - } - - except ModuleNotFoundError: - logging.exception(f'scanner {name} not found') - - event = { - **{'file': file_dict}, - **{'scan': scan}, - } - - p.rpush(f'event:{root_id}', strelka.format_event(event)) - p.expireat(f'event:{root_id}', expire_at) - p.execute() - signal.alarm(0) - - except strelka.DistributionTimeout: - logging.exception(f'node {file.uid} timed out') - - for f in files: - f.parent = file.uid - f.depth = file.depth + 1 - self.distribute(root_id, f, expire_at) - - except strelka.RequestTimeout: - signal.alarm(0) - raise - - def assign_scanner(self, scanner, mappings, flavors, file): - """Assigns scanners based on mappings and file data. - - Performs the task of assigning scanners based on the scan configuration - mappings and file flavors, filename, and source. Assignment supports - positive and negative matching: scanners are assigned if any positive - categories are matched and no negative categories are matched. Flavors are - literal matches, filename and source matches uses regular expressions. - - Args: - scanner: Name of the scanner to be assigned. - mappings: List of dictionaries that contain values used to assign - the scanner. - flavors: List of file flavors to use during scanner assignment. - filename: Filename to use during scanner assignment. - source: File source to use during scanner assignment. - Returns: - Dictionary containing the assigned scanner or None. - """ - for mapping in mappings: - negatives = mapping.get('negative', {}) - positives = mapping.get('positive', {}) - neg_flavors = negatives.get('flavors', []) - neg_filename = negatives.get('filename', None) - neg_source = negatives.get('source', None) - pos_flavors = positives.get('flavors', []) - pos_filename = positives.get('filename', None) - pos_source = positives.get('source', None) - assigned = {'name': scanner, - 'priority': mapping.get('priority', 5), - 'options': mapping.get('options', {})} - - for neg_flavor in neg_flavors: - if neg_flavor in flavors: - return None - if neg_filename is not None: - if re.search(neg_filename, file.name) is not None: - return None - if neg_source is not None: - if re.search(neg_source, file.source) is not None: - return None - for pos_flavor in pos_flavors: - if pos_flavor == '*' or pos_flavor in flavors: - return assigned - if pos_filename is not None: - if re.search(pos_filename, file.name) is not None: - return assigned - if pos_source is not None: - if re.search(pos_source, file.source) is not None: - return assigned - return None +from strelka import strelka def main(): - parser = argparse.ArgumentParser(prog='strelka-worker', - description='runs Strelka workers', - usage='%(prog)s [options]') - parser.add_argument('-c', '--worker-config', - action='store', - dest='backend_cfg_path', - help='path to server configuration file') + parser = argparse.ArgumentParser( + prog="strelka-worker", + description="runs Strelka workers", + usage="%(prog)s [options]", + ) + parser.add_argument( + "-c", + "--worker-config", + action="store", + dest="backend_cfg_path", + help="path to server configuration file", + ) args = parser.parse_args() - backend_cfg_path = '' + backend_cfg_path = "" if args.backend_cfg_path: if not os.path.exists(args.backend_cfg_path): - logging.exception(f'backend configuration {args.backend_cfg_path} does not exist') + logging.exception( + f"backend configuration {args.backend_cfg_path} does not exist" + ) sys.exit() backend_cfg_path = args.backend_cfg_path - elif os.path.exists('/etc/strelka/backend.yaml'): - backend_cfg_path = '/etc/strelka/backend.yaml' + elif os.path.exists("/etc/strelka/backend.yaml"): + backend_cfg_path = "/etc/strelka/backend.yaml" else: - logging.exception('no backend configuration found') + logging.exception("no backend configuration found") sys.exit() with open(backend_cfg_path) as f: backend_cfg = yaml.safe_load(f.read()) - log_cfg_path = backend_cfg.get('logging_cfg') + log_cfg_path = backend_cfg.get("logging_cfg") with open(log_cfg_path) as f: logging.config.dictConfig(yaml.safe_load(f.read())) - logging.info(f'using backend configuration {backend_cfg_path}') + logging.info(f"using backend configuration {backend_cfg_path}") try: - coordinator_cfg = backend_cfg.get('coordinator') - coordinator_addr = coordinator_cfg.get('addr').split(':') + coordinator_cfg = backend_cfg.get("coordinator") + coordinator_addr = coordinator_cfg.get("addr").split(":") coordinator = redis.StrictRedis( host=coordinator_addr[0], port=coordinator_addr[1], - db=coordinator_cfg.get('db'), + db=coordinator_cfg.get("db"), ) if coordinator.ping(): - logging.debug('verified coordinator is up') + logging.debug("verified coordinator is up") except Exception: - logging.exception('coordinator unavailable') + logging.exception("coordinator unavailable") sys.exit() - backend = Backend(backend_cfg, coordinator) + backend = strelka.Backend(backend_cfg, coordinator) backend.work() -def timeout_handler(ex): - """Signal timeout handler""" - def fn(signum, frame): - raise ex - return fn -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/src/python/strelka/__init__.py b/src/python/strelka/__init__.py index 2ae28399..e69de29b 100644 --- a/src/python/strelka/__init__.py +++ b/src/python/strelka/__init__.py @@ -1 +0,0 @@ -pass diff --git a/src/python/strelka/__main__.py b/src/python/strelka/__main__.py new file mode 100644 index 00000000..c8ca0222 --- /dev/null +++ b/src/python/strelka/__main__.py @@ -0,0 +1,5 @@ + + +if __name__ == '__main__': + + pass diff --git a/src/python/strelka/scanners/scan_url.py b/src/python/strelka/scanners/scan_url.py index 8c111785..99695a45 100644 --- a/src/python/strelka/scanners/scan_url.py +++ b/src/python/strelka/scanners/scan_url.py @@ -35,7 +35,7 @@ def scan(self, data, file, options, expire_at): else: url_regex = self.regexes['default'] - normalized_data = strelka.normalize_whitespace(data) + normalized_data = b' '.join(data.split()) self.event.setdefault('urls', []) urls = url_regex.findall(normalized_data) for url in urls: diff --git a/src/python/strelka/strelka.py b/src/python/strelka/strelka.py index ed3c69cb..80e56da9 100644 --- a/src/python/strelka/strelka.py +++ b/src/python/strelka/strelka.py @@ -1,16 +1,27 @@ +import glob +import importlib +import ipaddress +import itertools import json import logging +import math +import os import re import signal +import string import time import traceback import uuid +from types import FrameType +from typing import Any, Callable, Generator, Optional, Tuple, Union -from boltons import iterutils import inflection -from tldextract import TLDExtract -import ipaddress -import validators +import magic # type: ignore +import redis +import validators # type: ignore +import yara # type: ignore +from boltons import iterutils # type: ignore +from tldextract import TLDExtract # type: ignore class RequestTimeout(Exception): @@ -46,19 +57,20 @@ class File(object): name: String that contains the name of the file. source: String that describes which scanner the file originated from. """ - def __init__(self, pointer='', - parent='', depth=0, - name='', source=''): + + def __init__(self, pointer: str = '', + parent: str = '', depth: int = 0, + name: str = '', source: str = '') -> None: """Inits file object.""" - self.flavors = {} + self.flavors: dict = {} self.uid = str(uuid.uuid4()) - self.depth = depth - self.name = name - self.parent = parent - self.pointer = pointer or self.uid - self.source = source + self.depth: int = depth + self.name: str = name + self.parent: str = parent + self.pointer: str = pointer or self.uid + self.source: str = source - def add_flavors(self, flavors): + def add_flavors(self, flavors: dict): """Adds flavors to the file. In cases where flavors and self.flavors share duplicate keys, flavors @@ -67,6 +79,276 @@ def add_flavors(self, flavors): self.flavors = {**self.flavors, **flavors} +def timeout_handler(ex): + """Signal timeout handler""" + + def fn(signal_number: int, frame: Optional[FrameType]): + raise ex + + return fn + + +class Backend(object): + def __init__(self, backend_cfg: dict, coordinator: redis.StrictRedis) -> None: + self.scanner_cache: dict = {} + self.backend_cfg: dict = backend_cfg + self.coordinator: redis.StrictRedis = coordinator + self.limits: dict = backend_cfg.get('limits', {}) + self.scanners: dict = backend_cfg.get('scanners', {}) + + self.compiled_magic = magic.Magic( + magic_file=backend_cfg.get('tasting', {}).get('mime_db', ''), + mime=True, + ) + + yara_rules = backend_cfg.get('tasting', {}).get('yara_rules', '/etc/strelka/taste/') + if os.path.isdir(yara_rules): + yara_filepaths = {} + globbed_yara = glob.iglob( + f'{yara_rules}/**/*.yar*', + recursive=True, + ) + for (i, entry) in enumerate(globbed_yara): + yara_filepaths[f'namespace{i}'] = entry + self.compiled_yara = yara.compile(filepaths=yara_filepaths) + else: + self.compiled_yara = yara.compile(filepath=yara_rules) + + def match_flavors(self, data: bytes) -> dict: + return {'mime': self.taste_mime(data), 'yara': self.taste_yara(data)} + + def work(self) -> None: + logging.info('starting up') + + count = 0 + work_start = time.time() + work_expire = work_start + self.limits.get('time_to_live', 900) + + while 1: + if self.limits.get('max_files') != 0: + if count >= self.limits.get('max_files', 5000): + break + if self.limits.get('time_to_live') != 0: + if time.time() >= work_expire: + break + + task = self.coordinator.zpopmin('tasks', count=1) + if len(task) == 0: + time.sleep(0.25) + continue + + (root_id, expire_at) = task[0] + root_id = root_id.decode() + file = File(pointer=root_id) + expire_at = math.ceil(expire_at) + timeout = math.ceil(expire_at - time.time()) + if timeout <= 0: + continue + + try: + signal.signal(signal.SIGALRM, timeout_handler(RequestTimeout)) + signal.alarm(timeout) + self.distribute(root_id, file, expire_at) + p = self.coordinator.pipeline(transaction=False) + p.rpush(f'event:{root_id}', 'FIN') + p.expireat(f'event:{root_id}', expire_at) + p.execute() + signal.alarm(0) + except RequestTimeout: + logging.debug(f'request {root_id} timed out') + except Exception: + signal.alarm(0) + logging.exception('unknown exception (see traceback below)') + + count += 1 + + logging.info(f'shutdown after scanning {count} file(s) and' + f' {time.time() - work_start} second(s)') + + def taste_mime(self, data: bytes) -> list: + """Tastes file data with libmagic.""" + return [self.compiled_magic.from_buffer(data)] + + def taste_yara(self, data: bytes) -> list: + """Tastes file data with YARA.""" + encoded_whitespace = string.whitespace.encode() + stripped_data = data.lstrip(encoded_whitespace) + yara_matches = self.compiled_yara.match(data=stripped_data) + return [match.rule for match in yara_matches] + + def distribute(self, root_id: str, file: File, expire_at: int) -> None: + """Distributes a file through scanners.""" + try: + files = [] + + try: + signal.signal(signal.SIGALRM, timeout_handler(DistributionTimeout)) + signal.alarm(self.limits.get('distribution', 600)) + if file.depth > self.limits.get('max_depth', 15): + logging.info(f'request {root_id} exceeded maximum depth') + return + + data = b'' + while 1: + pop = self.coordinator.lpop(f'data:{file.pointer}') + if pop is None: + break + data += pop + + file.add_flavors(self.match_flavors(data)) + + scanner_list = self.match_scanners(file) + + p = self.coordinator.pipeline(transaction=False) + tree_dict = { + 'node': file.uid, + 'parent': file.parent, + 'root': root_id, + } + + if file.depth == 0: + tree_dict['node'] = root_id + if file.depth == 1: + tree_dict['parent'] = root_id + + file_dict = { + 'depth': file.depth, + 'name': file.name, + 'flavors': file.flavors, + 'scanners': [s.get('name') for s in scanner_list], + 'size': len(data), + 'source': file.source, + 'tree': tree_dict, + } + scan: dict = {} + + for scanner in scanner_list: + try: + name = scanner['name'] + und_name = inflection.underscore(name) + scanner_import = f'strelka.scanners.{und_name}' + module = importlib.import_module(scanner_import) + if und_name not in self.scanner_cache: + attr = getattr(module, name)(self.backend_cfg, self.coordinator) + self.scanner_cache[und_name] = attr + options = scanner.get('options', {}) + plugin = self.scanner_cache[und_name] + (f, s) = plugin.scan_wrapper( + data, + file, + options, + expire_at, + ) + files.extend(f) + + scan = { + **scan, + **s, + } + + except ModuleNotFoundError: + logging.exception(f'scanner {scanner.get("name", "__missing__")} not found') + + event = { + **{'file': file_dict}, + **{'scan': scan}, + } + + p.rpush(f'event:{root_id}', format_event(event)) + p.expireat(f'event:{root_id}', expire_at) + p.execute() + signal.alarm(0) + + except DistributionTimeout: + logging.exception(f'node {file.uid} timed out') + + for f in files: + f.parent = file.uid + f.depth = file.depth + 1 + self.distribute(root_id, f, expire_at) + + except RequestTimeout: + signal.alarm(0) + raise + + def match_scanner(self, scanner: str, mappings: list, file: File, ignore_wildcards: Optional[bool] = False) -> dict: + """Matches a scanner to mappings and file data. + + Performs the task of assigning a scanner based on the scan configuration + mappings and file flavors, filename, and source. Assignment supports + positive and negative matching: scanners are assigned if any positive + categories are matched and no negative categories are matched. Flavors are + literal matches, filename and source matches uses regular expressions. + + Args: + scanner: Name of the scanner to be assigned. + mappings: List of dictionaries that contain values used to assign + the scanner. + file: File object to use during scanner assignment. + ignore_wildcards: Filter out wildcard scanner matches + Returns: + Dictionary containing the assigned scanner or None. + """ + for mapping in mappings: + negatives = mapping.get('negative', {}) + positives = mapping.get('positive', {}) + neg_flavors = negatives.get('flavors', []) + neg_filename = negatives.get('filename', None) + neg_source = negatives.get('source', None) + pos_flavors = positives.get('flavors', []) + pos_filename = positives.get('filename', None) + pos_source = positives.get('source', None) + assigned = {'name': scanner, + 'priority': mapping.get('priority', 5), + 'options': mapping.get('options', {})} + + for neg_flavor in neg_flavors: + if neg_flavor in itertools.chain(*file.flavors.values()): + return {} + if neg_filename is not None: + if re.search(neg_filename, file.name) is not None: + return {} + if neg_source is not None: + if re.search(neg_source, file.source) is not None: + return {} + for pos_flavor in pos_flavors: + if (pos_flavor == '*' and not ignore_wildcards) or pos_flavor in itertools.chain(*file.flavors.values()): + return assigned + if pos_filename is not None: + if re.search(pos_filename, file.name) is not None: + return assigned + if pos_source is not None: + if re.search(pos_source, file.source) is not None: + return assigned + + return {} + + def match_scanners(self, file: File, ignore_wildcards: Optional[bool] = False) -> list: + """ + Wraps match_scanner + + Args: + file: File object to use during scanner assignment. + ignore_wildcards: Filter out wildcard scanner matches. + Returns: + List of scanner dictionaries. + """ + scanner_list = [] + + for name in self.scanners: + mappings = self.scanners.get(name, {}) + scanner = self.match_scanner(name, mappings, file, ignore_wildcards) + if scanner: + scanner_list.append(scanner) + + scanner_list.sort( + key=lambda k: k.get('priority', 5), + reverse=True, + ) + + return scanner_list + + class IocOptions(object): """ Defines an ioc options object that can be used to specify the ioc_type for developers as opposed to using a @@ -100,33 +382,37 @@ class Scanner(object): (see scan_wrapper). coordinator: Redis client connection to the coordinator. """ - def __init__(self, backend_cfg, coordinator): + + def __init__(self, backend_cfg: dict, coordinator: redis.StrictRedis) -> None: """Inits scanner with scanner name and metadata key.""" self.name = self.__class__.__name__ self.key = inflection.underscore(self.name.replace('Scan', '')) self.scanner_timeout = backend_cfg.get('limits', {}).get('scanner', 10) - self.signal = None self.coordinator = coordinator - self.event = dict() - self.files = [] - self.flags = [] - self.iocs = [] + self.event: dict = dict() + self.files: list = [] + self.flags: list = [] + self.iocs: list = [] self.type = IocOptions - self.extract = TLDExtract(suffix_list_urls=None) + self.extract = TLDExtract(suffix_list_urls=[]) self.init() - def init(self): + def init(self) -> None: """Overrideable init. This method can be used to setup one-time variables required during scanning.""" pass + def timeout_handler(self, signal_number: int, frame: Optional[FrameType]) -> None: + """Signal ScannerTimeout""" + raise ScannerTimeout + def scan(self, data, file, options, - expire_at): + expire_at) -> None: """Overrideable scan method. Args: @@ -141,7 +427,7 @@ def scan_wrapper(self, data, file, options, - expire_at): + expire_at) -> Tuple[list, dict]: """Sets up scan attributes and calls scan method. Scanning code is wrapped in try/except for error handling. @@ -168,7 +454,7 @@ def scan_wrapper(self, self.scanner_timeout or 10) try: - self.signal = signal.signal(signal.SIGALRM, timeout_handler) + signal.signal(signal.SIGALRM, self.timeout_handler) signal.alarm(self.scanner_timeout) self.scan(data, file, options, expire_at) signal.alarm(0) @@ -193,7 +479,7 @@ def scan_wrapper(self, {self.key: self.event} ) - def upload_to_coordinator(self, pointer, chunk, expire_at): + def upload_to_coordinator(self, pointer, chunk, expire_at) -> None: """Uploads data to coordinator. This method is used during scanning to upload data to coordinator, @@ -211,7 +497,7 @@ def upload_to_coordinator(self, pointer, chunk, expire_at): p.expireat(f'data:{pointer}', expire_at) p.execute() - def process_ioc(self, ioc, ioc_type, scanner_name, description='', malicious=False): + def process_ioc(self, ioc, ioc_type, scanner_name, description='', malicious=False) -> None: if not ioc: return if ioc_type == 'url': @@ -243,7 +529,7 @@ def process_ioc(self, ioc, ioc_type, scanner_name, description='', malicious=Fal else: self.iocs.append({'ioc': ioc, 'ioc_type': ioc_type, 'scanner': scanner_name, 'description': description}) - def add_iocs(self, ioc, ioc_type, description='', malicious=False): + def add_iocs(self, ioc, ioc_type, description='', malicious=False) -> None: """Adds ioc to the iocs. :param ioc: The IOC or list of IOCs to be added. All iocs must be of the same type. Must be type String or Bytes. :param ioc_type: Must be one of md5, sha1, sha256, domain, url, email, ip, either as string or type object (e.g. self.type.domain). @@ -261,21 +547,23 @@ def add_iocs(self, ioc, ioc_type, description='', malicious=False): if isinstance(i, bytes): i = i.decode() if not isinstance(i, str): - logging.warning(f"Could not process {i} from {self.name}: Type {type(i)} is not type Bytes or String") + logging.warning( + f"Could not process {i} from {self.name}: Type {type(i)} is not type Bytes or String") continue self.process_ioc(i, ioc_type, self.name, description=description, malicious=malicious) else: if isinstance(ioc, bytes): ioc = ioc.decode() if not isinstance(ioc, str): - logging.warning(f"Could not process {ioc} from {self.name}: Type {type(ioc)} is not type Bytes or String") + logging.warning( + f"Could not process {ioc} from {self.name}: Type {type(ioc)} is not type Bytes or String") return self.process_ioc(ioc, ioc_type, self.name, description=description, malicious=malicious) except Exception as e: logging.error(f"Failed to add {ioc} from {self.name}: {e}") -def chunk_string(s, chunk=1024 * 16): +def chunk_string(s, chunk=1024 * 16) -> Generator[bytes, None, None]: """Takes an input string and turns it into smaller byte pieces. This method is required for inserting data into coordinator. @@ -290,28 +578,7 @@ def chunk_string(s, chunk=1024 * 16): yield s[c:c + chunk] -def normalize_whitespace(text): - """Normalizes whitespace in text. - - Scanners that parse text generally need whitespace normalized, otherwise - metadata parsed from the text may be unreliable. This function normalizes - whitespace characters to a single space. - - Args: - text: Text that needs whitespace normalized. - Returns: - Text with whitespace normalized. - """ - if isinstance(text, bytes): - text = re.sub(br'\s+', b' ', text) - text = re.sub(br'(^\s+|\s+$)', b'', text) - elif isinstance(text, str): - text = re.sub(r'\s+', ' ', text) - text = re.sub(r'(^\s+|\s+$)', '', text) - return text - - -def format_event(metadata): +def format_event(metadata) -> str: """Formats file metadata into an event. This function must be used on file metadata before the metadata is @@ -328,6 +595,7 @@ def format_event(metadata): Returns: JSON-formatted file event. """ + def visit(path, key, value): if isinstance(value, (bytes, bytearray)): value = str(value, encoding='UTF-8', errors='replace') @@ -339,8 +607,3 @@ def visit(path, key, value): lambda p, k, v: v != '' and v != [] and v != {} and v is not None, ) return json.dumps(remap2) - - -def timeout_handler(signum, frame): - """Signal ScannerTimeout""" - raise ScannerTimeout diff --git a/src/python/strelka/tests/__init__.py b/src/python/strelka/tests/__init__.py index 0b1b3c9f..a42918b6 100644 --- a/src/python/strelka/tests/__init__.py +++ b/src/python/strelka/tests/__init__.py @@ -13,9 +13,9 @@ def run_test_scan( if options is None: options = {} if "scanner_timeout" not in options: - options["scanner_timeout"] = 10 + options["scanner_timeout"] = 20 if backend_cfg is None: - backend_cfg = {"limits": {"scanner": 10}} + backend_cfg = {"limits": {"scanner": 20}} scanner = scan_class(backend_cfg, "test_coordinate") diff --git a/src/python/strelka/tests/fixtures/test.url b/src/python/strelka/tests/fixtures/test.url new file mode 100644 index 00000000..6531a8c3 --- /dev/null +++ b/src/python/strelka/tests/fixtures/test.url @@ -0,0 +1,5 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit http://foobar.example.com, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + +Elementum sagittis vitae et leo (ftp://barfoo.example.com/) duis ut diam. Nulla facilisi etiam dignissim diam quis. Volutpat commodo sed egestas egestas. example.com + +Malesuada fames ac turpis egestas maecenas pharetra https://barfoo.example.com/ convallis posuere morbi. Tortor consequat id mailto:user@example.com porta nibh venenatis cras sed. Etiam tempor orci eu lobortis elementum nibh tellus molestie. Id eu nisl nunc mi ipsum. Suspendisse sed nisi lacus sed viverra tellus. Consectetur adipiscing elit duis tristique sollicitudin nibh sit. Bibendum ut tristique et egestas. Amet nisl suscipit adipiscing bibendum est ultricies integer quis auctor. Eget aliquet nibh praesent tristique magna sit amet. Nec ultrices dui sapien eget mi proin sed. Venenatis tellus in metus vulputate. Dictumst vestibulum rhoncus est pellentesque elit ullamcorper. \ No newline at end of file diff --git a/src/python/strelka/tests/fixtures/test_passwords.dat b/src/python/strelka/tests/helpers/test_passwords.dat similarity index 100% rename from src/python/strelka/tests/fixtures/test_passwords.dat rename to src/python/strelka/tests/helpers/test_passwords.dat diff --git a/src/python/strelka/tests/test_scan_encrypted_doc.py b/src/python/strelka/tests/test_scan_encrypted_doc.py index c48afe6c..e77cb981 100644 --- a/src/python/strelka/tests/test_scan_encrypted_doc.py +++ b/src/python/strelka/tests/test_scan_encrypted_doc.py @@ -21,7 +21,7 @@ def test_scan_encrypted_doc(mocker): mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test_password.doc", - options={"log_pws": True, "password_file": str(Path(Path(__file__).parent / "fixtures/test_passwords.dat"))} + options={"log_pws": True, "password_file": str(Path(Path(__file__).parent / "helpers/test_passwords.dat"))} ) TestCase.maxDiff = None @@ -44,7 +44,7 @@ def test_scan_encrypted_docx(mocker): mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test_password.docx", - options={"log_pws": True, "password_file": str(Path(Path(__file__).parent / "fixtures/test_passwords.dat"))} + options={"log_pws": True, "password_file": str(Path(Path(__file__).parent / "helpers/test_passwords.dat"))} ) TestCase.maxDiff = None diff --git a/src/python/strelka/tests/test_scan_ocr.py b/src/python/strelka/tests/test_scan_ocr.py index a41d927a..57b16fe6 100644 --- a/src/python/strelka/tests/test_scan_ocr.py +++ b/src/python/strelka/tests/test_scan_ocr.py @@ -420,5 +420,3 @@ def test_scan_ocr_webp(mocker): TestCase.maxDiff = None TestCase().assertDictEqual(test_scan_event, scanner_event) - - diff --git a/src/python/strelka/tests/test_scan_url.py b/src/python/strelka/tests/test_scan_url.py index 5a39d61b..ff2a2575 100644 --- a/src/python/strelka/tests/test_scan_url.py +++ b/src/python/strelka/tests/test_scan_url.py @@ -1,36 +1,54 @@ -import pytest -import datetime -from strelka.scanners.scan_url import ScanUrl - -scanner = ScanUrl( - {"name": "ScanUrl", "key": "scan_url", "limits": {"scanner": 10}}, - "test_coordinate", -) - -tests = [ - (b"some othervalue foo", []), - (b"http://foobar.test.com", [b"http://foobar.test.com"]), - (b"foo http://foobar.test.com bar", [b"http://foobar.test.com"]), - (b"http://\n", []), - (b"noschema.foo\n", [b"noschema.foo"]), -] - - -@pytest.mark.parametrize("data,expected", tests) -def test_scan_simple_url(data, expected): +from pathlib import Path +from unittest import TestCase, mock + +from strelka.scanners.scan_url import ScanUrl as ScanUnderTest +from strelka.tests import run_test_scan + + +def test_scan_url_text(mocker): """ - This tests the ScanURL scanner. - It attempts to validate the extraction of several URLs against - their URLs extracted from the ScanURL scanner. + Pass: Sample event matches output of scanner. + Failure: Unable to load file or sample event fails to match. + """ + + test_scan_event = { + "elapsed": mock.ANY, + "flags": [], + "urls": [ + b"http://foobar.example.com", + b"ftp://barfoo.example.com", + b"example.com", + b"https://barfoo.example.com", + ], + } + + scanner_event = run_test_scan( + mocker=mocker, + scan_class=ScanUnderTest, + fixture_path=Path(__file__).parent / "fixtures/test.url", + ) + + TestCase.maxDiff = None + TestCase().assertDictEqual(test_scan_event, scanner_event) + - Pass: All URLs successfully extracted or tests passed. - Failure: Unable to extract URLs successfully or extracts undefined URLs. +def test_scan_url_html(mocker): """ + Pass: Sample event matches output of scanner. + Failure: Unable to load file or sample event fails to match. + """ + + test_scan_event = { + "elapsed": mock.ANY, + "flags": [], + "urls": [b"https://example.com/example.js"], + } - scanner.scan_wrapper( - data, - "somefile.foo", - {"length": 4, "scanner_timeout": 5}, - datetime.date.today(), + scanner_event = run_test_scan( + mocker=mocker, + scan_class=ScanUnderTest, + fixture_path=Path(__file__).parent / "fixtures/test.html", ) - assert scanner.event.get("urls") == expected + + TestCase.maxDiff = None + TestCase().assertDictEqual(test_scan_event, scanner_event) diff --git a/src/python/strelka/tests_configuration/__init__.py b/src/python/strelka/tests_configuration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/python/strelka/tests_configuration/test_scanner_assignment.py b/src/python/strelka/tests_configuration/test_scanner_assignment.py new file mode 100644 index 00000000..f56566c9 --- /dev/null +++ b/src/python/strelka/tests_configuration/test_scanner_assignment.py @@ -0,0 +1,235 @@ +from pathlib import Path +from unittest import TestCase, mock + +import os +import pytest +import redis +import yaml +from strelka import strelka + +# Scanners that apply to all files (*) are not included +test_assignments_expected = { + "test.7z": ["ScanLibarchive"], + "test.b64": ["ScanUrl"], + "test.bat": ["ScanBatch"], + "test.bz2": ["ScanBzip2"], + "test.cpio": ["ScanLibarchive"], + "test.deb": ["ScanLibarchive"], + "test.der": ["ScanX509"], + "test.dmg": ["ScanDmg"], + "test.doc": ["ScanEncryptedZip", "ScanExiftool", "ScanOle", "ScanVba"], + "test.docx": ["ScanDocx", "ScanZip"], + "test.elf": [], + "test.eml": ["ScanEmail"], + "test.empty": [], + "test.exe": ["ScanPe"], + "test.gif": ["ScanExiftool", "ScanGif"], + "test.gz": ["ScanGzip"], + "test.html": ["ScanHtml"], + "test.ini": ["ScanUrl"], + "test.iso": ["ScanIso"], + "test.jpg": [ + "ScanExiftool", + "ScanJpeg", + "ScanLsb", + "ScanNf", + "ScanOcr", + "ScanQr", + ], + "test.js": ["ScanJavascript"], + "test.json": ["ScanJson"], + "test.lnk": ["ScanExiftool", "ScanLNK"], + "test.macho": ["ScanMacho"], + "test.msi": ["ScanExiftool", "ScanOle", "ScanVba"], # TODO: Needs CDF subtype + "test.pcap": ["ScanPcap"], + "test.pcapng": [], + "test.pdf": ["ScanExiftool", "ScanPdf"], + "test.pem": ["ScanUrl", "ScanX509"], + "test.plist": ["ScanPlist", "ScanXml"], + "test.png": [ + "ScanExiftool", + "ScanLsb", + "ScanNf", + "ScanOcr", + "ScanPngEof", + "ScanQr", + ], + "test.rar": ["ScanRar"], + "test.tar": ["ScanTar"], + "test.txt": ["ScanUrl"], + "test.txt.asc": ["ScanPgp"], + "test.txt.gpg": [], # FIXME: Need binary PGP yara signature + "test.url": ["ScanUrl"], + "test.vhd": ["ScanVhd"], + "test.vhdx": ["ScanVhd"], + "test.webp": ["ScanExiftool", "ScanLsb", "ScanNf", "ScanOcr", "ScanQr"], + "test.xar": ["ScanLibarchive"], + "test.xml": ["ScanXml"], + "test.xz": ["ScanLzma"], + "test.yara": ["ScanUrl"], + "test.zip": ["ScanZip"], + "test_aes256_password.zip": ["ScanEncryptedZip", "ScanZip"], + "test_broken_iend.png": [ + "ScanExiftool", + "ScanLsb", + "ScanNf", + "ScanOcr", + "ScanPngEof", + "ScanQr", + ], + "test_classic.doc": ["ScanEncryptedZip", "ScanExiftool", "ScanOle", "ScanVba"], + "test_embed_rar.jpg": [ + "ScanExiftool", + "ScanJpeg", + "ScanLsb", + "ScanNf", + "ScanOcr", + "ScanQr", + ], + "test_embed_rar.png": [ + "ScanExiftool", + "ScanLsb", + "ScanNf", + "ScanOcr", + "ScanPngEof", + "ScanQr", + ], + "test_lzx.cab": ["ScanLibarchive"], + "test_manifest.json": ["ScanJson", "ScanManifest"], + "test_password.doc": ["ScanEncryptedZip", "ScanExiftool", "ScanOle", "ScanVba"], + "test_password.docx": [ + "ScanEncryptedDoc", + "ScanExiftool", + "ScanOle", + "ScanVba", + ], + "test_password_brute.doc": [ + "ScanEncryptedZip", + "ScanExiftool", + "ScanOle", + "ScanVba", + ], + "test_password_brute.docx": [ + "ScanEncryptedDoc", + "ScanExiftool", + "ScanOle", + "ScanVba", + ], + "test_pe.b64": ["ScanBase64PE", "ScanUrl"], + "test_pe_object.doc": [ + "ScanEncryptedZip", + "ScanExiftool", + "ScanOle", + "ScanVba", + ], + "test_pe_object_classic.doc": [ + "ScanEncryptedZip", + "ScanExiftool", + "ScanOle", + "ScanVba", + ], + "test_pe_overlay.bmp": [ + "ScanBmpEof", + "ScanExiftool", + "ScanLsb", + "ScanNf", + "ScanOcr", + "ScanQr", + ], + "test_pe_overlay.jpg": [ + "ScanExiftool", + "ScanJpeg", + "ScanLsb", + "ScanNf", + "ScanOcr", + "ScanQr", + ], + "test_pe_overlay.png": [ + "ScanExiftool", + "ScanLsb", + "ScanNf", + "ScanOcr", + "ScanPngEof", + "ScanQr", + ], + "test_pii.csv": [], # NOTE: ScanCcn not enabled + "test_private.pgp": ["ScanPgp"], + "test_public.pgp": ["ScanPgp"], + "test_qr.jpg": [ + "ScanExiftool", + "ScanJpeg", + "ScanLsb", + "ScanNf", + "ScanOcr", + "ScanQr", + ], + "test_qr.png": [ + "ScanExiftool", + "ScanLsb", + "ScanNf", + "ScanOcr", + "ScanPngEof", + "ScanQr", + ], + "test_qr.webp": ["ScanExiftool", "ScanLsb", "ScanNf", "ScanOcr", "ScanQr"], + "test_readonly.dmg": ["ScanDmg"], + "test_readwrite.dmg": [], # FIXME: Should be assigned to a scanner + "test_text.jpg": [ + "ScanExiftool", + "ScanJpeg", + "ScanLsb", + "ScanNf", + "ScanOcr", + "ScanQr", + ], + "test_text.png": [ + "ScanExiftool", + "ScanLsb", + "ScanNf", + "ScanOcr", + "ScanPngEof", + "ScanQr", + ], + "test_text.webp": ["ScanExiftool", "ScanLsb", "ScanNf", "ScanOcr", "ScanQr"], + "test_upx.exe": ["ScanPe", "ScanUpx"], + "test_xor.exe": ["ScanPe"], + "test_zip.cab": ["ScanLibarchive"], + "test_zip_password.zip": ["ScanEncryptedZip", "ScanZip"], +} + + +@pytest.mark.parametrize( + "fixture_path,expected", [(k, v) for k, v in test_assignments_expected.items()] +) +def test_fixture_scanner_assignment(fixture_path, expected) -> None: + """ + Pass: All test fixtures match the given yara and mime matches. + Failure: At least one test fixture does not match the given yara and mime matches. + """ + + if os.path.exists("/etc/strelka/backend.yaml"): + backend_cfg_path: str = "/etc/strelka/backend.yaml" + else: + backend_cfg_path: str = Path(Path(__file__).parent / "../../../../configs/python/backend/backend.yaml") + + with open(backend_cfg_path, "r") as f: + backend_cfg = yaml.safe_load(f.read()) + + coordinator = redis.StrictRedis(host="127.0.0.1", port=65535, db=0) + + backend = strelka.Backend(backend_cfg, coordinator) + + assignments = [] + + with open( + Path(Path(__file__).parent / f"../tests/fixtures/{fixture_path}"), "rb" + ) as test_file: + data = test_file.read() + file = strelka.File() + file.add_flavors(backend.match_flavors(data)) + matches = backend.match_scanners(file, ignore_wildcards=True) + for match in matches: + assignments.append(match.get("name", "__missing__")) + + TestCase.maxDiff = None + TestCase().assertListEqual(expected, assignments) diff --git a/src/python/strelka/tests_configuration/test_taste.py b/src/python/strelka/tests_configuration/test_taste.py new file mode 100644 index 00000000..60eae89f --- /dev/null +++ b/src/python/strelka/tests_configuration/test_taste.py @@ -0,0 +1,171 @@ +import os +import redis +import yaml +import pytest + +from pathlib import Path +from unittest import TestCase, mock + +from strelka import strelka + + +taste_expectations: dict = { + "test.7z": {"mime": ["application/x-7z-compressed"], "yara": ["_7zip_file"]}, + "test.b64": {"mime": ["text/plain"], "yara": []}, # FIXME: No file-specific match + "test.bat": {"mime": ["text/x-msdos-batch"], "yara": []}, # FIXME: Not in backend.cfg + "test.bz2": {"mime": ["application/x-bzip2"], "yara": ["bzip2_file"]}, + "test.cpio": {"mime": ["application/x-cpio"], "yara": []}, + "test.deb": { + "mime": ["application/vnd.debian.binary-package"], + "yara": ["debian_package_file"], + }, + "test.der": {"mime": ["application/octet-stream"], "yara": ["x509_der_file"]}, + "test.dmg": {"mime": ["application/octet-stream"], "yara": ["hfsplus_disk_image"]}, + "test.doc": {"mime": ["application/msword"], "yara": ["olecf_file"]}, + "test.docx": { + "mime": [ + "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + ], + "yara": ["ooxml_file"], + }, + "test.elf": {"mime": ["application/x-sharedlib"], "yara": ["elf_file"]}, + "test.eml": {"mime": ["message/rfc822"], "yara": ["email_file"]}, + "test.empty": {"mime": ["application/x-empty"], "yara": []}, + "test.exe": {"mime": ["application/x-dosexec"], "yara": ["mz_file"]}, + "test.gif": {"mime": ["image/gif"], "yara": ["gif_file"]}, + "test.gz": {"mime": ["application/gzip"], "yara": ["gzip_file"]}, + "test.html": {"mime": ["text/html"], "yara": ["html_file"]}, + "test.ini": {"mime": ["text/plain"], "yara": ["ini_file"]}, + "test.iso": {"mime": ["application/x-iso9660-image"], "yara": ["iso_file"]}, + "test.jpg": {"mime": ["image/jpeg"], "yara": ["jpeg_file"]}, + "test.js": {"mime": ["text/plain"], "yara": ["javascript_file"]}, + "test.json": {"mime": ["application/json"], "yara": ["json_file"]}, + "test.lnk": {"mime": ["application/octet-stream"], "yara": ["lnk_file"]}, + "test.macho": {"mime": ["application/x-mach-binary"], "yara": ["macho_file"]}, + "test.msi": { + "mime": ["application/vnd.ms-msi"], + "yara": ["olecf_file"], + }, # TODO: CDF format needs subtypes + "test.pcap": {"mime": ["application/vnd.tcpdump.pcap"], "yara": ["pcap_file"]}, + "test.pcapng": {"mime": ["application/octet-stream"], "yara": []}, # FIXME: pcapng_file broken + "test.pdf": {"mime": ["application/pdf"], "yara": ["pdf_file"]}, + "test.pem": {"mime": ["text/plain"], "yara": ["x509_pem_file"]}, + "test.plist": {"mime": ["text/xml"], "yara": ["plist_file", "xml_file"]}, + "test.png": {"mime": ["image/png"], "yara": ["png_file"]}, + "test.rar": {"mime": ["application/x-rar"], "yara": ["rar_file"]}, + "test.tar": {"mime": ["application/x-tar"], "yara": ["tar_file"]}, + "test.txt": {"mime": ["text/plain"], "yara": []}, + "test.txt.asc": {"mime": ["text/PGP"], "yara": ["pgp_file"]}, + "test.txt.gpg": {"mime": ["application/octet-stream"], "yara": []}, # FIXME: Need binary PGP yara signature + "test.url": {"mime": ["text/plain"], "yara": []}, + "test.vhd": {"mime": ["application/octet-stream"], "yara": ["vhd_file"]}, + "test.vhdx": {"mime": ["application/octet-stream"], "yara": ["vhdx_file"]}, + "test.webp": {"mime": ["image/webp"], "yara": []}, + "test.xar": {"mime": ["application/x-xar"], "yara": ["xar_file"]}, + "test.xml": {"mime": ["text/xml"], "yara": ["xml_file"]}, + "test.xz": {"mime": ["application/x-xz"], "yara": ["xz_file"]}, + "test.yara": {"mime": ["text/plain"], "yara": []}, + "test.zip": {"mime": ["application/zip"], "yara": ["zip_file"]}, + "test_aes256_password.zip": { + "mime": ["application/zip"], + "yara": ["encrypted_zip", "zip_file"], + }, + "test_broken_iend.png": {"mime": ["image/png"], "yara": ["png_file"]}, + "test_classic.doc": {"mime": ["application/msword"], "yara": ["olecf_file"]}, + "test_embed_rar.jpg": {"mime": ["image/jpeg"], "yara": ["jpeg_file"]}, + "test_embed_rar.png": {"mime": ["image/png"], "yara": ["png_file"]}, + "test_lzx.cab": { + "mime": ["application/vnd.ms-cab-compressed"], + "yara": ["cab_file"], + }, + "test_manifest.json": { + "mime": ["application/json"], + "yara": ["browser_manifest", "json_file"], + }, + "test_password.doc": {"mime": ["application/msword"], "yara": ["olecf_file"]}, + "test_password.docx": { + "mime": ["application/encrypted"], + "yara": ["encrypted_word_document", "olecf_file"], + }, + "test_password_brute.doc": {"mime": ["application/msword"], "yara": ["olecf_file"]}, + "test_password_brute.docx": { + "mime": ["application/encrypted"], + "yara": ["encrypted_word_document", "olecf_file"], + }, + "test_pe.b64": {"mime": ["text/plain"], "yara": ["base64_pe"]}, + "test_pe_object.doc": {"mime": ["application/msword"], "yara": ["olecf_file"]}, + "test_pe_object_classic.doc": {"mime": ["application/msword"], "yara": ["olecf_file"]}, + "test_pe_overlay.bmp": {"mime": ["image/bmp"], "yara": ["bmp_file"]}, + "test_pe_overlay.jpg": {"mime": ["image/jpeg"], "yara": ["jpeg_file"]}, + "test_pe_overlay.png": {"mime": ["image/png"], "yara": ["png_file"]}, + "test_pii.csv": {"mime": ["text/csv"], "yara": ["credit_cards"]}, + "test_private.pgp": {"mime": ["application/pgp-keys"], "yara": []}, + "test_public.pgp": {"mime": ["application/pgp-keys"], "yara": ["pgp_file"]}, + "test_qr.jpg": {"mime": ["image/jpeg"], "yara": ["jpeg_file"]}, + "test_qr.png": {"mime": ["image/png"], "yara": ["png_file"]}, + "test_qr.webp": {"mime": ["image/webp"], "yara": []}, + "test_readonly.dmg": { + "mime": ["application/octet-stream"], + "yara": ["dmg_disk_image"], + }, + "test_readwrite.dmg": {"mime": ["application/octet-stream"], "yara": []}, + "test_text.jpg": {"mime": ["image/jpeg"], "yara": ["jpeg_file"]}, + "test_text.png": {"mime": ["image/png"], "yara": ["png_file"]}, + "test_text.webp": {"mime": ["image/webp"], "yara": []}, + "test_upx.exe": { + "mime": ["application/x-dosexec"], + "yara": ["mz_file", "upx_file"], + }, + "test_xor.exe": {"mime": ["application/x-dosexec"], "yara": ["mz_file"]}, + "test_zip.cab": { + "mime": ["application/vnd.ms-cab-compressed"], + "yara": ["cab_file"], + }, + "test_zip_password.zip": { + "mime": ["application/zip"], + "yara": ["encrypted_zip", "zip_file"], + }, +} + + +@pytest.mark.parametrize("fixture_path,expected", [(k, v) for k, v in taste_expectations.items()]) +def test_fixture_taste_output(fixture_path, expected) -> None: + """ + Pass: All test fixtures match the given yara and mime matches. + Failure: At least one test fixture does not match the given yara and mime matches. + """ + + if os.path.exists("/etc/strelka/backend.yaml"): + backend_cfg_path: str = "/etc/strelka/backend.yaml" + else: + backend_cfg_path: str = Path(Path(__file__).parent / "../../../../configs/python/backend/backend.yaml") + + with open(backend_cfg_path, "r") as f: + backend_cfg = yaml.safe_load(f.read()) + + coordinator = redis.StrictRedis(host="127.0.0.1", port=65535, db=0) + + backend = strelka.Backend(backend_cfg, coordinator) + + with open( + Path(Path(__file__).parent / f"../tests/fixtures/{fixture_path}"), "rb" + ) as test_file: + data = test_file.read() + taste = backend.match_flavors(data) + + TestCase.maxDiff = None + TestCase().assertDictEqual(expected, taste, msg=fixture_path) + + +def test_taste_required() -> None: + """ + Pass: All test fixtures in the given directory have assigned test_taste data. + Failure: A test fixture file exists that doesn't have a corresponding test_taste entry. + """ + + test_fixtures = sorted( + list(Path(Path(__file__).parent / "../tests/fixtures/").glob("test*")) + ) + + for test_fixture in test_fixtures: + TestCase().assertIn(os.path.basename(test_fixture), taste_expectations.keys(), msg="Fixture does not have a taste expectation")