From be7d83e3ccf98945ba7759b20c041b09845c8a50 Mon Sep 17 00:00:00 2001 From: Caleb Walch Date: Mon, 5 Dec 2022 14:27:20 -0600 Subject: [PATCH 1/5] Add Scanner Test Coverage - Adds pytests for several scanners --- .github/workflows/pr-actions.yaml | 51 +++++++++++++++++++ .gitignore | 1 + src/python/requirements.txt | 46 +++++++++++++++++ src/python/strelka/strelka.py | 4 +- src/python/strelka/tests/__init__.py | 0 src/python/strelka/tests/fixtures/test.gif | Bin 0 -> 3815 bytes src/python/strelka/tests/test_scan_footer.py | 22 ++++++++ src/python/strelka/tests/test_scan_gif.py | 21 ++++++++ src/python/strelka/tests/test_scan_url.py | 28 ++++++++++ 9 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/pr-actions.yaml create mode 100644 src/python/requirements.txt create mode 100644 src/python/strelka/tests/__init__.py create mode 100644 src/python/strelka/tests/fixtures/test.gif create mode 100644 src/python/strelka/tests/test_scan_footer.py create mode 100644 src/python/strelka/tests/test_scan_gif.py create mode 100644 src/python/strelka/tests/test_scan_url.py diff --git a/.github/workflows/pr-actions.yaml b/.github/workflows/pr-actions.yaml new file mode 100644 index 00000000..06407947 --- /dev/null +++ b/.github/workflows/pr-actions.yaml @@ -0,0 +1,51 @@ +name: pr-actions +on: [pull_request] + +jobs: + backend-strelka-test: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + + python-version: '3.x' + + architecture: 'x64' + - name: Install dependencies + run: | + sudo apt-get -q update + sudo apt-get install --no-install-recommends -qq automake \ + build-essential \ + libfuzzy-dev \ + gcc \ + git \ + libarchive-dev \ + libmagic-dev \ + libssl-dev \ + libzbar0 \ + libgl1 \ + python3-setuptools \ + libgmp-dev \ + libpcap-dev \ + libbz2-dev \ + libgomp1 \ + python3-dev \ + python3-wheel \ + mupdf-tools \ + mupdf \ + libglu1-mesa \ + libtool \ + pkg-config \ + swig \ + tesseract-ocr + python -m pip install --upgrade pip + pip install validators setuptools --upgrade + pip install --no-cache-dir -r src/python/requirements.txt + - name: Test with pytest + run: | + pytest + + diff --git a/.gitignore b/.gitignore index fedab71a..f6d44511 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ __pycache__/ # Distribution / packaging .Python develop-eggs/ +src/python/build dist/ downloads/ eggs/ diff --git a/src/python/requirements.txt b/src/python/requirements.txt new file mode 100644 index 00000000..e04b472a --- /dev/null +++ b/src/python/requirements.txt @@ -0,0 +1,46 @@ +arc4==0.0.4 +beautifulsoup4==4.9.3 +boltons==20.2.1 +construct==2.10.67 +cryptography==3.4.7 +docker==5.0.0 +esprima==4.0.1 +eml-parser>=1.17 +git+https://github.com/jshlbrd/python-entropy.git # v0.11 as of this freeze (package installed as 'entropy') +html5lib==1.1 +inflection==0.5.1 +jsbeautifier==1.13.13 +libarchive-c==2.9 +lief==0.12.3 +lxml==4.9.1 +M2Crypto==0.38.0 +nested-lookup==0.2.22 +numpy==1.22.1 +olefile==0.46 +oletools==0.56.1 +opencv-python==4.6.0.66 +opencv-contrib-python==4.6.0.66 +pefile==2019.4.18 +pgpdump3==1.5.2 +pyelftools==0.27 +pygments==2.9.0 +pylzma==0.5.0 +pytesseract==0.3.7 +python-docx==0.8.10 +python-magic==0.4.22 +py-tlsh==4.7.2 +pyyaml>=5.4.1 +pyzbar==0.1.8 +pytz>=2022.1 +rarfile==4.0 +redis==3.5.3 +requests==2.25.1 +rpmfile==1.0.8 +signify==0.3.0 +speakeasy-emulator==1.5.2 +ssdeep==3.4 +tldextract==3.1.0 +tnefparse==1.4.0 +xmltodict==0.12.0 +pytest==7.2.0 +pytest-mock==3.10.0 diff --git a/src/python/strelka/strelka.py b/src/python/strelka/strelka.py index 3579ed29..c9d81688 100644 --- a/src/python/strelka/strelka.py +++ b/src/python/strelka/strelka.py @@ -92,6 +92,7 @@ class Scanner(object): This is referenced in the scanner metadata. key: String that contains the scanner's metadata key. This is used to identify the scanner metadata in scan results. + event: Dictionary containing the result of scan backend_cfg: Dictionary that contains the parsed backend configuration. scanner_timeout: Amount of time (in seconds) that a scanner can spend scanning a file. Can be overridden on a per-scanner basis @@ -104,6 +105,7 @@ def __init__(self, backend_cfg, coordinator): self.key = inflection.underscore(self.name.replace('Scan', '')) self.scanner_timeout = backend_cfg.get('limits').get('scanner') self.coordinator = coordinator + self.event = dict() self.iocs = [] self.type = IocOptions self.extract = TLDExtract(suffix_list_urls=None) @@ -159,7 +161,7 @@ def scan_wrapper(self, start = time.time() self.files = [] self.flags = [] - self.event = {} + self.event = dict() self.scanner_timeout = options.get('scanner_timeout', self.scanner_timeout) diff --git a/src/python/strelka/tests/__init__.py b/src/python/strelka/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/python/strelka/tests/fixtures/test.gif b/src/python/strelka/tests/fixtures/test.gif new file mode 100644 index 0000000000000000000000000000000000000000..4f8b05797b256ece17ef014ec9451ea56a608e94 GIT binary patch literal 3815 zcmcIm2UJsO7Jfirq>N)3Sqo~!u9%Pj0fI2D)>PHn2 zax^>ADU;*-cS{2Ru(GnkzwTNQKY=zi(b6zN!y&Rl0$-QT85)667&?Z3KdQ!{V61F$ zbg(7v3ZA4YJzrHP4aQ?trOzrD${JF%a5i{dCo0aw>4d43ldY99R$A>C_$UL(AX11p zItI)j5=b;8Lsc4sC0pW<{O8qWs5JPC2;EjyT4Qwsyy{bnOvQl}AaX~nWMMEc0s(=^ zDPs9IHge!voIHdXBs90;HF-rRnU;H;!X&XA7f`mdH92_7H zau7203KXWSth{Oi4nM+|I6`wI(J_o8B--w87Em~v6%|jR!DPo`8yCS9*aaPDrzXhl@Xe9vT|qy z45qB4sjRH2p$tRID=8v0(BEwQOjiS;t%O3L(6VwUZ5RxV(A3mcL?aL=Ek(4fB0}@K zt{#a-$B?XW-~8hFe*e%#{iQBai;Bb0$y8G^nedGPMmA(RnPx+#fVE5z-~(szBrMs1 zc5t;kU$aHwsCauER+~yDg1_L4#Q(&CNK8>sK%5Up#*{_jGnZUsqdGT~&FzqP(oMq`0WC zAU`iRC!2FCEAwVXdRl5qa#CVKd|YfybW~(ScvxsiFgqylMu5Mcug`UFFHe>S)7{P0 z#o5V`;b3n^r%|s_$Rr}c_A1`yiZu>vWr?x4Y<|h?;)V0)&Yn4KYGQoK=;R5*;|BVA zx;olul$NH3y4o?-qbf*cK7|zI<=`+`DCCID;X?-x?BBO{&u-~m;GH|ROKsb_W%EBI zH~qeG!}@iT7! zUZsuqYmHl4w={Kdx^~gyix9qb>bkD85{KOTAlo%)S;Y39uEGpVupwt?5rpS-E& zy_@|Ill64WDEIv@-LdL$kT~3AQ8cE^v{*_KP(Q_NRNatPBbJfnA&3Sfg)PFZjZ9IY zJGBtANM(K?WiLThY9eHKD+q>9aLO0_~hjZDqM5<&??R1cUOtM^%i4r;pmo^T+F$LgG7) zR}@S%_-;IG-d<5S0s7Ob$hPWb*YK-SwPJCAUzJ1~P99;dmLk(dCyq1n=3VAj>XRwD zGF#adENY4jSj~|4ddTlkPM=tr-V7;IMCQ%KnGMaxR}d0BrxA)>ncmshIWEbmQ(ggDR`4{SemoIMr^AO!pROOGcANBdN2av$eaqr>0%8WiYJM5SSF?!pFvk zCiEu(uHf|W?1MSs=krP#UFFu#KTq*&X5=Ju+SJ_Nfwqq5WRGhwU(yMcB7y5`93`)v zI(mP<0kerMr1|;o%k}BaA(Hw|63PHhBy#5^OXln;o^Y%H;pu2gab{PNx~JH7{gSK) zX}V#DF1422>diDt(@)SZ<@9HtE57=uwba({e}HduI<OzO^}>*rp`?`KI~d>~oz z`@Crmcw05^Ray11w7Q|P_rL1&%wJ&bxP12YtP{w^(4Fkz3GwiE_6~IMVR%3T{DUHy zHylF!!egSMf@7VO!jo>ArvMzm9C$|N&0E>Iu>6AL7!Hsln4Vb9cB*VBu8~ivzXQ70 zP_5A1lG@hZsFGFJqVtgT0IxOjXbf#Gz~u?eSP+DH)3Z;fo(s$fJ^gKwx3u;h_rtHB z1U`vM=@um4@m#at%DihlRcI9Ufi&%Wt!_;8+dq@uClHx^OuiIyt{|jMYyfCx4)IsAzzDt z5OV^AeK|BF&?nL{DilagiDM^#qEk{~kvD0~EjDwrjF+6&)$PqV$%q z1121^mZFTdC|Rr4c)jAA9dYs-{~xGT?Pb(mKI|X?321smedkB2>NhNp7r)Xu|7P5^ zOXuSUrVAf6lEPJQKe9R8*Po3C1>%AuY>z~V2hlD^nDJ3N7$X)(O-Q_ul6oLrQG0t1BMiD0A0 zr|ykXO=gN5=Gt0m^L`87Z@jJ$t1rB_`6#^HoIbgq+oukBJ0mq3DjSNFVHRxmT~m0n zR@fPGsQH|??1Z+%hHXlFHgDKnZ4Vg4_B9%ETcaTRLl^DTSdH^rz=yI<>lv&Rp5hYe*Iuj zaD)x$G7uXVhK)=L!NkPHCRimWS*E2cc_;c?rWXm8DCA`pV2aX91k0TA{R*pMYgEcP zo;uB`t(5u-H~lEKQJ?=nO7!5#Yb^|uv5v>aPe!Ipd30O5$!pIZQYVJJ7Kkq#m&RR} zUd?d{OC7U1(97leppAQT&U#O5@_8w7leQFWiK3scmFaFq?-I}v+IibW!p|Gq6li|p znCz_|sHgOE>Mg8(#Q$c)@=?qFg4((GWmeN<(QW1lwTCa2;@iVVCKtJ`?)&udOEtj*uo21cFdF#4*M^KkJs?XN&q4m&kzh1qji zIjo}G!sPs%Xw$0^&c#M`^(J>~0~_z>7POu$7Vqdh-rdvNt<~E$I5ey~IM$_x8<WUADVj}F+U5#Ei}w41IvOd@R!`;61KJ)umS?@LFDgH@%#(~^(q8Ez7w#1 z)7XKie4N<*08s}DVKaB~PO#qMX{ih<*Z_N21uv>z%wxul9%#?eTi`L{H`>2m6s3E1 u#iA9YGGgQ#dH};8KbzSP^DP?uw;Pn@3>Dk-{_rzH6a-YP$z)3mRqUT@A@Owp literal 0 HcmV?d00001 diff --git a/src/python/strelka/tests/test_scan_footer.py b/src/python/strelka/tests/test_scan_footer.py new file mode 100644 index 00000000..6a3705f4 --- /dev/null +++ b/src/python/strelka/tests/test_scan_footer.py @@ -0,0 +1,22 @@ +import datetime +from strelka.scanners.scan_footer import ScanFooter + + +def test_scan_footer(): + """Attach file footer""" + + scanner = ScanFooter( + { + "name": "ScanFooter", + "key": "scan_footer", + "limits": {"scanner": 10}, + }, + "test_coordinate", + ) + scanner.scan_wrapper( + "foo bar mcee", + {"uid": "12345", "name": "somename"}, + {"length": 4, "scanner_timeout": 5}, + datetime.date.today(), + ) + assert scanner.event.get("footer") == "mcee" diff --git a/src/python/strelka/tests/test_scan_gif.py b/src/python/strelka/tests/test_scan_gif.py new file mode 100644 index 00000000..87120bab --- /dev/null +++ b/src/python/strelka/tests/test_scan_gif.py @@ -0,0 +1,21 @@ +import datetime +from pathlib import Path +from strelka.scanners.scan_gif import ScanGif + + +def test_scan_gif(mocker): + """Attach trailer index""" + + scanner = ScanGif( + {"name": "ScanGif", "key": "scan_gif", "limits": {"scanner": 10}}, + "test_coordinate", + ) + + mocker.patch.object(ScanGif, "upload_to_coordinator", return_value=None) + scanner.scan_wrapper( + Path(Path(__file__).parent / "fixtures/test.gif").read_bytes(), + {"uid": "12345", "name": "somename"}, + {"scanner_timeout": 5}, + datetime.date.today(), + ) + assert scanner.event.get("trailer_index") == 3806 diff --git a/src/python/strelka/tests/test_scan_url.py b/src/python/strelka/tests/test_scan_url.py new file mode 100644 index 00000000..24ff17cc --- /dev/null +++ b/src/python/strelka/tests/test_scan_url.py @@ -0,0 +1,28 @@ +import pytest +import datetime +from strelka.scanners.scan_url import ScanUrl + +scanner = ScanUrl( + {"name": "ScanUrl", "key": "scan_url", "limits": {"scanner": 10}}, + "test_coordinate", + ) + +tests = [ + (b"some othervalue foo", []), + (b"http://foobar.test.com", [b"http://foobar.test.com"]), + (b"foo http://foobar.test.com bar", [b"http://foobar.test.com"]), + (b"http://\n", []), + (b"noschema.foo\n", [b"noschema.foo"]), + ] + +@pytest.mark.parametrize("data,expected", tests) +def test_scan_simple_url(data, expected): + """Extract URLs from payloads""" + + scanner.scan_wrapper( + data, + "somefile.foo", + {"length": 4, "scanner_timeout": 5}, + datetime.date.today(), + ) + assert scanner.event.get("urls") == expected From 4a2e765d18e5d382dddebe64935ab089169d2c6c Mon Sep 17 00:00:00 2001 From: Paul Hutelmyer Date: Wed, 7 Dec 2022 06:56:52 -0500 Subject: [PATCH 2/5] Adding validators to requirements --- src/python/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/python/requirements.txt b/src/python/requirements.txt index e04b472a..95a4a212 100644 --- a/src/python/requirements.txt +++ b/src/python/requirements.txt @@ -41,6 +41,7 @@ speakeasy-emulator==1.5.2 ssdeep==3.4 tldextract==3.1.0 tnefparse==1.4.0 +validators==0.18.2 xmltodict==0.12.0 pytest==7.2.0 pytest-mock==3.10.0 From d498c652caace7add88638b3981f81739cf4a16f Mon Sep 17 00:00:00 2001 From: Paul Hutelmyer Date: Wed, 7 Dec 2022 07:01:10 -0500 Subject: [PATCH 3/5] Reformatting --- src/python/strelka/tests/test_scan_gif.py | 17 +++++++++++++---- src/python/strelka/tests/test_scan_url.py | 23 ++++++++++++++++------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/python/strelka/tests/test_scan_gif.py b/src/python/strelka/tests/test_scan_gif.py index 87120bab..aa09a216 100644 --- a/src/python/strelka/tests/test_scan_gif.py +++ b/src/python/strelka/tests/test_scan_gif.py @@ -4,18 +4,27 @@ def test_scan_gif(mocker): - """Attach trailer index""" + # Attach trailer index scanner = ScanGif( - {"name": "ScanGif", "key": "scan_gif", "limits": {"scanner": 10}}, + { + "name": "ScanGif", + "key": "scan_gif", + "limits": {"scanner": 10} + }, "test_coordinate", ) mocker.patch.object(ScanGif, "upload_to_coordinator", return_value=None) scanner.scan_wrapper( Path(Path(__file__).parent / "fixtures/test.gif").read_bytes(), - {"uid": "12345", "name": "somename"}, - {"scanner_timeout": 5}, + { + "uid": "12345", + "name": "somename" + }, + { + "scanner_timeout": 5 + }, datetime.date.today(), ) assert scanner.event.get("trailer_index") == 3806 diff --git a/src/python/strelka/tests/test_scan_url.py b/src/python/strelka/tests/test_scan_url.py index 24ff17cc..4756c91d 100644 --- a/src/python/strelka/tests/test_scan_url.py +++ b/src/python/strelka/tests/test_scan_url.py @@ -2,10 +2,15 @@ import datetime from strelka.scanners.scan_url import ScanUrl + scanner = ScanUrl( - {"name": "ScanUrl", "key": "scan_url", "limits": {"scanner": 10}}, - "test_coordinate", - ) + { + "name": "ScanUrl", + "key": "scan_url", + "limits": {"scanner": 10} + }, + "test_coordinate", +) tests = [ (b"some othervalue foo", []), @@ -13,16 +18,20 @@ (b"foo http://foobar.test.com bar", [b"http://foobar.test.com"]), (b"http://\n", []), (b"noschema.foo\n", [b"noschema.foo"]), - ] +] + @pytest.mark.parametrize("data,expected", tests) def test_scan_simple_url(data, expected): - """Extract URLs from payloads""" + # Extract URLs from payloads scanner.scan_wrapper( data, "somefile.foo", - {"length": 4, "scanner_timeout": 5}, + { + "length": 4, + "scanner_timeout": 5 + }, datetime.date.today(), ) - assert scanner.event.get("urls") == expected + assert scanner.event.get("urls") == expected \ No newline at end of file From 009a63b8e299451d906bf290a83e3d3a6651b225 Mon Sep 17 00:00:00 2001 From: Paul Hutelmyer Date: Wed, 7 Dec 2022 07:14:08 -0500 Subject: [PATCH 4/5] Adding some documentation --- src/python/strelka/tests/test_scan_footer.py | 8 +++++++- src/python/strelka/tests/test_scan_gif.py | 8 +++++++- src/python/strelka/tests/test_scan_url.py | 10 ++++++++-- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/python/strelka/tests/test_scan_footer.py b/src/python/strelka/tests/test_scan_footer.py index 6a3705f4..3d77d4d0 100644 --- a/src/python/strelka/tests/test_scan_footer.py +++ b/src/python/strelka/tests/test_scan_footer.py @@ -3,7 +3,13 @@ def test_scan_footer(): - """Attach file footer""" + """ + This tests the ScanFooter scanner. + It attempts to validate the extraction of a string from a file's content. + + Pass: File is loaded, scanned, and footer value "mcee" is successfully extracted. + Failure: Unable to load, scan, or extract value "mcee" + """ scanner = ScanFooter( { diff --git a/src/python/strelka/tests/test_scan_gif.py b/src/python/strelka/tests/test_scan_gif.py index aa09a216..a8450f7b 100644 --- a/src/python/strelka/tests/test_scan_gif.py +++ b/src/python/strelka/tests/test_scan_gif.py @@ -4,7 +4,13 @@ def test_scan_gif(mocker): - # Attach trailer index + """ + This tests the ScanGif scanner. + It attempts to validate a given GIFs "trailer index" value. + + Pass: Trailer index matches specified value. + Failure: Unable to load file or trailer index does not match specified value. + """ scanner = ScanGif( { diff --git a/src/python/strelka/tests/test_scan_url.py b/src/python/strelka/tests/test_scan_url.py index 4756c91d..193fc89c 100644 --- a/src/python/strelka/tests/test_scan_url.py +++ b/src/python/strelka/tests/test_scan_url.py @@ -2,7 +2,6 @@ import datetime from strelka.scanners.scan_url import ScanUrl - scanner = ScanUrl( { "name": "ScanUrl", @@ -23,7 +22,14 @@ @pytest.mark.parametrize("data,expected", tests) def test_scan_simple_url(data, expected): - # Extract URLs from payloads + """ + This tests the ScanURL scanner. + It attempts to validate the extraction of several URLs against + their URLs extracted from the ScanURL scanner. + + Pass: All URLs successfully extracted or tests passed. + Failure: Unable to extract URLs successfully or extracts undefined URLs. + """ scanner.scan_wrapper( data, From 70af1c447251396bda7126e5d84d0214f8855a2d Mon Sep 17 00:00:00 2001 From: Paul Hutelmyer Date: Wed, 7 Dec 2022 07:30:06 -0500 Subject: [PATCH 5/5] Adding docs --- docs/README.md | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/docs/README.md b/docs/README.md index 9068561f..7f21245e 100644 --- a/docs/README.md +++ b/docs/README.md @@ -53,6 +53,8 @@ Strelka differs from its sibling projects in a few significant ways: * [Protobuf](#protobuf) * [Scanners](#scanners) * [Scanner List](#scanner-list) +* [Tests](#tests) + * [Setup](#tests-setup) * [Use Cases](#use-cases) * [Contributing](#contributing) * [Related Projects](#related-projects) @@ -594,6 +596,46 @@ The table below describes each scanner and its options. Each scanner has the hid | ScanZip | Extracts files from zip archives | "limit" -- maximum number of files to extract (defaults to 1000)
"password_file" -- location of passwords file for zip archives (defaults to etc/strelka/passwords.txt) | | ScanZlib | Decompresses gzip files | N/A +## Tests +As Strelka consists of many scanners and dependencies for those scanners, Pytests are particularly valuable for testing the ongoing functionality of Strelka and it's scanners. Tests allow users to write test cases that verify the correct behavior of Strelka scanners to ensure that the scanners remain reliable and accurate. Additionally, using pytests can help streamline the development process, allowing developers to focus on writing new features and improvements for the scanners. The following section details how to setup Pytests. + +If using Strelka on Github, this repository supports Github Actions which runs on Pull Requests + +### Tests Setup +Here are the steps for setting up a virtualenv virtual environment, installing requirements from src/python/requirements.txt, and running pytest: + +1. Install virtualenv, if it is not already installed: + +``` +pip install virtualenv +``` +2. Create a new virtual environment: + +``` +virtualenv +``` + +3. Activate the virtual environment: + +``` +source /bin/activate +``` + +4. Install the requirements from src/python/requirements.txt: + +``` +pip install -r src/python/requirements.txt +``` + +5. Run pytest to execute the test cases: + +``` +pytest +``` + +Upon execution, you will be provided the successes and failures for any available scanner test. + + ## Use Cases Below are some select use cases that show the value Strelka can add to a threat detection tech stack. Keep in mind that these results are parsed in real time without post-processing and are typically correlated with other detection/response tools (e.g. Bro, Volatility, etc.). The file metadata shown below was derived from files found in [VirusShare](https://virusshare.com/) torrent no. 323 and from a test file in the [MaliciousMacroBot (MMBot) repository](https://github.com/egaus/MaliciousMacroBot).