diff --git a/.github/workflows/pr-actions.yaml b/.github/workflows/pr-actions.yaml new file mode 100644 index 00000000..06407947 --- /dev/null +++ b/.github/workflows/pr-actions.yaml @@ -0,0 +1,51 @@ +name: pr-actions +on: [pull_request] + +jobs: + backend-strelka-test: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + + python-version: '3.x' + + architecture: 'x64' + - name: Install dependencies + run: | + sudo apt-get -q update + sudo apt-get install --no-install-recommends -qq automake \ + build-essential \ + libfuzzy-dev \ + gcc \ + git \ + libarchive-dev \ + libmagic-dev \ + libssl-dev \ + libzbar0 \ + libgl1 \ + python3-setuptools \ + libgmp-dev \ + libpcap-dev \ + libbz2-dev \ + libgomp1 \ + python3-dev \ + python3-wheel \ + mupdf-tools \ + mupdf \ + libglu1-mesa \ + libtool \ + pkg-config \ + swig \ + tesseract-ocr + python -m pip install --upgrade pip + pip install validators setuptools --upgrade + pip install --no-cache-dir -r src/python/requirements.txt + - name: Test with pytest + run: | + pytest + + diff --git a/.gitignore b/.gitignore index fedab71a..f6d44511 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ __pycache__/ # Distribution / packaging .Python develop-eggs/ +src/python/build dist/ downloads/ eggs/ diff --git a/docs/README.md b/docs/README.md index 9068561f..7f21245e 100644 --- a/docs/README.md +++ b/docs/README.md @@ -53,6 +53,8 @@ Strelka differs from its sibling projects in a few significant ways: * [Protobuf](#protobuf) * [Scanners](#scanners) * [Scanner List](#scanner-list) +* [Tests](#tests) + * [Setup](#tests-setup) * [Use Cases](#use-cases) * [Contributing](#contributing) * [Related Projects](#related-projects) @@ -594,6 +596,46 @@ The table below describes each scanner and its options. Each scanner has the hid | ScanZip | Extracts files from zip archives | "limit" -- maximum number of files to extract (defaults to 1000)
"password_file" -- location of passwords file for zip archives (defaults to etc/strelka/passwords.txt) | | ScanZlib | Decompresses gzip files | N/A +## Tests +As Strelka consists of many scanners and dependencies for those scanners, Pytests are particularly valuable for testing the ongoing functionality of Strelka and it's scanners. Tests allow users to write test cases that verify the correct behavior of Strelka scanners to ensure that the scanners remain reliable and accurate. Additionally, using pytests can help streamline the development process, allowing developers to focus on writing new features and improvements for the scanners. The following section details how to setup Pytests. + +If using Strelka on Github, this repository supports Github Actions which runs on Pull Requests + +### Tests Setup +Here are the steps for setting up a virtualenv virtual environment, installing requirements from src/python/requirements.txt, and running pytest: + +1. Install virtualenv, if it is not already installed: + +``` +pip install virtualenv +``` +2. Create a new virtual environment: + +``` +virtualenv +``` + +3. Activate the virtual environment: + +``` +source /bin/activate +``` + +4. Install the requirements from src/python/requirements.txt: + +``` +pip install -r src/python/requirements.txt +``` + +5. Run pytest to execute the test cases: + +``` +pytest +``` + +Upon execution, you will be provided the successes and failures for any available scanner test. + + ## Use Cases Below are some select use cases that show the value Strelka can add to a threat detection tech stack. Keep in mind that these results are parsed in real time without post-processing and are typically correlated with other detection/response tools (e.g. Bro, Volatility, etc.). The file metadata shown below was derived from files found in [VirusShare](https://virusshare.com/) torrent no. 323 and from a test file in the [MaliciousMacroBot (MMBot) repository](https://github.com/egaus/MaliciousMacroBot). diff --git a/src/python/requirements.txt b/src/python/requirements.txt new file mode 100644 index 00000000..95a4a212 --- /dev/null +++ b/src/python/requirements.txt @@ -0,0 +1,47 @@ +arc4==0.0.4 +beautifulsoup4==4.9.3 +boltons==20.2.1 +construct==2.10.67 +cryptography==3.4.7 +docker==5.0.0 +esprima==4.0.1 +eml-parser>=1.17 +git+https://github.com/jshlbrd/python-entropy.git # v0.11 as of this freeze (package installed as 'entropy') +html5lib==1.1 +inflection==0.5.1 +jsbeautifier==1.13.13 +libarchive-c==2.9 +lief==0.12.3 +lxml==4.9.1 +M2Crypto==0.38.0 +nested-lookup==0.2.22 +numpy==1.22.1 +olefile==0.46 +oletools==0.56.1 +opencv-python==4.6.0.66 +opencv-contrib-python==4.6.0.66 +pefile==2019.4.18 +pgpdump3==1.5.2 +pyelftools==0.27 +pygments==2.9.0 +pylzma==0.5.0 +pytesseract==0.3.7 +python-docx==0.8.10 +python-magic==0.4.22 +py-tlsh==4.7.2 +pyyaml>=5.4.1 +pyzbar==0.1.8 +pytz>=2022.1 +rarfile==4.0 +redis==3.5.3 +requests==2.25.1 +rpmfile==1.0.8 +signify==0.3.0 +speakeasy-emulator==1.5.2 +ssdeep==3.4 +tldextract==3.1.0 +tnefparse==1.4.0 +validators==0.18.2 +xmltodict==0.12.0 +pytest==7.2.0 +pytest-mock==3.10.0 diff --git a/src/python/strelka/strelka.py b/src/python/strelka/strelka.py index 3579ed29..c9d81688 100644 --- a/src/python/strelka/strelka.py +++ b/src/python/strelka/strelka.py @@ -92,6 +92,7 @@ class Scanner(object): This is referenced in the scanner metadata. key: String that contains the scanner's metadata key. This is used to identify the scanner metadata in scan results. + event: Dictionary containing the result of scan backend_cfg: Dictionary that contains the parsed backend configuration. scanner_timeout: Amount of time (in seconds) that a scanner can spend scanning a file. Can be overridden on a per-scanner basis @@ -104,6 +105,7 @@ def __init__(self, backend_cfg, coordinator): self.key = inflection.underscore(self.name.replace('Scan', '')) self.scanner_timeout = backend_cfg.get('limits').get('scanner') self.coordinator = coordinator + self.event = dict() self.iocs = [] self.type = IocOptions self.extract = TLDExtract(suffix_list_urls=None) @@ -159,7 +161,7 @@ def scan_wrapper(self, start = time.time() self.files = [] self.flags = [] - self.event = {} + self.event = dict() self.scanner_timeout = options.get('scanner_timeout', self.scanner_timeout) diff --git a/src/python/strelka/tests/__init__.py b/src/python/strelka/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/python/strelka/tests/fixtures/test.gif b/src/python/strelka/tests/fixtures/test.gif new file mode 100644 index 00000000..4f8b0579 Binary files /dev/null and b/src/python/strelka/tests/fixtures/test.gif differ diff --git a/src/python/strelka/tests/test_scan_footer.py b/src/python/strelka/tests/test_scan_footer.py new file mode 100644 index 00000000..3d77d4d0 --- /dev/null +++ b/src/python/strelka/tests/test_scan_footer.py @@ -0,0 +1,28 @@ +import datetime +from strelka.scanners.scan_footer import ScanFooter + + +def test_scan_footer(): + """ + This tests the ScanFooter scanner. + It attempts to validate the extraction of a string from a file's content. + + Pass: File is loaded, scanned, and footer value "mcee" is successfully extracted. + Failure: Unable to load, scan, or extract value "mcee" + """ + + scanner = ScanFooter( + { + "name": "ScanFooter", + "key": "scan_footer", + "limits": {"scanner": 10}, + }, + "test_coordinate", + ) + scanner.scan_wrapper( + "foo bar mcee", + {"uid": "12345", "name": "somename"}, + {"length": 4, "scanner_timeout": 5}, + datetime.date.today(), + ) + assert scanner.event.get("footer") == "mcee" diff --git a/src/python/strelka/tests/test_scan_gif.py b/src/python/strelka/tests/test_scan_gif.py new file mode 100644 index 00000000..a8450f7b --- /dev/null +++ b/src/python/strelka/tests/test_scan_gif.py @@ -0,0 +1,36 @@ +import datetime +from pathlib import Path +from strelka.scanners.scan_gif import ScanGif + + +def test_scan_gif(mocker): + """ + This tests the ScanGif scanner. + It attempts to validate a given GIFs "trailer index" value. + + Pass: Trailer index matches specified value. + Failure: Unable to load file or trailer index does not match specified value. + """ + + scanner = ScanGif( + { + "name": "ScanGif", + "key": "scan_gif", + "limits": {"scanner": 10} + }, + "test_coordinate", + ) + + mocker.patch.object(ScanGif, "upload_to_coordinator", return_value=None) + scanner.scan_wrapper( + Path(Path(__file__).parent / "fixtures/test.gif").read_bytes(), + { + "uid": "12345", + "name": "somename" + }, + { + "scanner_timeout": 5 + }, + datetime.date.today(), + ) + assert scanner.event.get("trailer_index") == 3806 diff --git a/src/python/strelka/tests/test_scan_url.py b/src/python/strelka/tests/test_scan_url.py new file mode 100644 index 00000000..193fc89c --- /dev/null +++ b/src/python/strelka/tests/test_scan_url.py @@ -0,0 +1,43 @@ +import pytest +import datetime +from strelka.scanners.scan_url import ScanUrl + +scanner = ScanUrl( + { + "name": "ScanUrl", + "key": "scan_url", + "limits": {"scanner": 10} + }, + "test_coordinate", +) + +tests = [ + (b"some othervalue foo", []), + (b"http://foobar.test.com", [b"http://foobar.test.com"]), + (b"foo http://foobar.test.com bar", [b"http://foobar.test.com"]), + (b"http://\n", []), + (b"noschema.foo\n", [b"noschema.foo"]), +] + + +@pytest.mark.parametrize("data,expected", tests) +def test_scan_simple_url(data, expected): + """ + This tests the ScanURL scanner. + It attempts to validate the extraction of several URLs against + their URLs extracted from the ScanURL scanner. + + Pass: All URLs successfully extracted or tests passed. + Failure: Unable to extract URLs successfully or extracts undefined URLs. + """ + + scanner.scan_wrapper( + data, + "somefile.foo", + { + "length": 4, + "scanner_timeout": 5 + }, + datetime.date.today(), + ) + assert scanner.event.get("urls") == expected \ No newline at end of file