diff --git a/configs/python/backend/backend.yaml b/configs/python/backend/backend.yaml index 0850a085..f020b062 100644 --- a/configs/python/backend/backend.yaml +++ b/configs/python/backend/backend.yaml @@ -311,6 +311,7 @@ scanners: priority: 5 options: beautify: True + max_strings: 50 'ScanJpeg': - positive: flavors: diff --git a/src/python/strelka/scanners/scan_javascript.py b/src/python/strelka/scanners/scan_javascript.py index a676de96..938b1e31 100644 --- a/src/python/strelka/scanners/scan_javascript.py +++ b/src/python/strelka/scanners/scan_javascript.py @@ -12,50 +12,70 @@ class ScanJavascript(strelka.Scanner): deobfuscated. Defaults to True. """ + def scan(self, data, file, options, expire_at): - beautify = options.get('beautify', True) + beautify = options.get("beautify", True) + max_strings = options.get("max_strings", 50) - self.event.setdefault('tokens', []) - self.event.setdefault('keywords', []) - self.event.setdefault('strings', []) - self.event.setdefault('identifiers', []) - self.event.setdefault('regular_expressions', []) - self.event['beautified'] = False + self.event.setdefault("tokens", set()) + self.event.setdefault("keywords", set()) + self.event.setdefault("strings", set()) + self.event.setdefault("identifiers", set()) + self.event.setdefault("regular_expressions", set()) + self.event["beautified"] = False js = None try: if beautify: js = jsbeautifier.beautify(data.decode()) - self.event['beautified'] = True + self.event["beautified"] = True + except strelka.ScannerTimeout: + raise + except Exception: + self.flags.append("beautify_failed") + + try: + if js is None: + js = data.decode() + except strelka.ScannerTimeout: + raise + except Exception: + self.flags.append("decode_failed") + + try: + tokens = esprima.tokenize( + js, + options={ + "comment": True, + "tolerant": True, + }, + ) + for t in tokens: + if t.type not in self.event["tokens"]: + self.event["tokens"].add(t.type) + if t.type == "String": + stripped_val = t.value.strip("\"'") + if stripped_val not in self.event["strings"]: + self.event["strings"].add(stripped_val) + if t.type == "Keyword": + if t.value not in self.event["keywords"]: + self.event["keywords"].add(t.value) + if t.type == "Identifier": + if t.value not in self.event["identifiers"]: + self.event["identifiers"].add(t.value) + if t.type == "RegularExpression": + if t.value not in self.event["regular_expressions"]: + self.event["regular_expressions"].add(t.value) + + self.event["tokens"] = list(self.event["tokens"])[:max_strings] + self.event["keywords"] = list(self.event["keywords"])[:max_strings] + self.event["strings"] = list(self.event["strings"])[:max_strings] + self.event["identifiers"] = list(self.event["identifiers"])[:max_strings] + self.event["regular_expressions"] = list(self.event["regular_expressions"])[ + :max_strings + ] except strelka.ScannerTimeout: raise except Exception: - self.flags.append('beautify_failed') - - if js is None: - js = data.decode() - - tokens = esprima.tokenize( - js, - options={ - 'comment': True, - 'tolerant': True, - } - ) - for t in tokens: - if t.type not in self.event['tokens']: - self.event['tokens'].append(t.type) - if t.type == 'String': - stripped_val = t.value.strip('"\'') - if stripped_val not in self.event['strings']: - self.event['strings'].append(stripped_val) - if t.type == 'Keyword': - if t.value not in self.event['keywords']: - self.event['keywords'].append(t.value) - if t.type == 'Identifier': - if t.value not in self.event['identifiers']: - self.event['identifiers'].append(t.value) - if t.type == 'RegularExpression': - if t.value not in self.event['regular_expressions']: - self.event['regular_expressions'].append(t.value) + self.flags.append("tokenization_failed") diff --git a/src/python/strelka/tests/test_scan_javascript.py b/src/python/strelka/tests/test_scan_javascript.py index 708230df..3beef5be 100644 --- a/src/python/strelka/tests/test_scan_javascript.py +++ b/src/python/strelka/tests/test_scan_javascript.py @@ -1,6 +1,7 @@ from pathlib import Path from unittest import TestCase, mock +from pytest_unordered import unordered from strelka.scanners.scan_javascript import ScanJavascript as ScanUnderTest from strelka.tests import run_test_scan @@ -14,97 +15,105 @@ def test_scan_javascript(mocker): test_scan_event = { "elapsed": mock.ANY, "flags": [], - "tokens": [ - "BlockComment", - "String", - "Punctuator", - "Keyword", - "Identifier", - "LineComment", - "RegularExpression", - "Numeric", - ], - "keywords": [ - "var", - "function", - "return", - "for", - "if", - "throw", - "else", - "typeof", - "new", - "this", - "in", - ], - "strings": [ - "use strict", - "path", - "fs", - "package.json", - "", - "-", - "Could not find partial with name ", - ".", - "string", - "function", - "Found unknown type of partial ", - " (", - ") in Handlebars partial Array => ", - "base64", - "utf8", - ], - "identifiers": [ - "path", - "require", - "fs", - "module", - "exports", - "register", - "Handlebars", - "opt", - "params", - "pkg", - "JSON", - "parse", - "readFileSync", - "join", - "process", - "cwd", - "slugify", - "str", - "toLowerCase", - "replace", - "helpers", - "key", - "escape", - "Utils", - "escapeExpression", - "jsonStringify", - "obj", - "stringify", - "concat", - "arr", - "i", - "arguments", - "length", - "partial", - "name", - "context", - "partials", - "compile", - "SafeString", - "atob", - "a", - "Buffer", - "toString", - "btoa", - "b", - "helper", - "hasOwnProperty", - "registerHelper", - ], - "regular_expressions": ["/[^\\w ]+/g", "/ +/g"], + "tokens": unordered( + [ + "BlockComment", + "String", + "Punctuator", + "Keyword", + "Identifier", + "LineComment", + "RegularExpression", + "Numeric", + ] + ), + "keywords": unordered( + [ + "var", + "function", + "return", + "for", + "if", + "throw", + "else", + "typeof", + "new", + "this", + "in", + ] + ), + "strings": unordered( + [ + "use strict", + "path", + "fs", + "package.json", + "", + "-", + "Could not find partial with name ", + ".", + "string", + "function", + "Found unknown type of partial ", + " (", + ") in Handlebars partial Array => ", + "base64", + "utf8", + ] + ), + "identifiers": unordered( + [ + "path", + "require", + "fs", + "module", + "exports", + "register", + "Handlebars", + "opt", + "params", + "pkg", + "JSON", + "parse", + "readFileSync", + "join", + "process", + "cwd", + "slugify", + "str", + "toLowerCase", + "replace", + "helpers", + "key", + "escape", + "Utils", + "escapeExpression", + "jsonStringify", + "obj", + "stringify", + "concat", + "arr", + "i", + "arguments", + "length", + "partial", + "name", + "context", + "partials", + "compile", + "SafeString", + "atob", + "a", + "Buffer", + "toString", + "btoa", + "b", + "helper", + "hasOwnProperty", + "registerHelper", + ] + ), + "regular_expressions": unordered(["/[^\\w ]+/g", "/ +/g"]), "beautified": True, } @@ -116,3 +125,53 @@ def test_scan_javascript(mocker): TestCase.maxDiff = None TestCase().assertDictEqual(test_scan_event, scanner_event) + + +def test_scan_javascript_character_max_strings(mocker): + """ + Pass: Sample event matches output of scanner. + Failure: Unable to load file or sample event fails to match. + """ + + test_scan_event = { + "elapsed": mock.ANY, + "flags": [], + "tokens": unordered( + [ + "BlockComment", + "String", + "Punctuator", + "Keyword", + ] + ), + "keywords": unordered(["throw", "return", "else", "var", "new"]), + "strings": unordered(["", "path", "string", "-", "base64"]), + "identifiers": unordered(["exports", "params", "cwd", "Buffer", "escape"]), + "regular_expressions": unordered(["/[^\\w ]+/g", "/ +/g"]), + "beautified": True, + } + + scanner_event = run_test_scan( + mocker=mocker, + scan_class=ScanUnderTest, + fixture_path=Path(__file__).parent / "fixtures/test.js", + options={"max_strings": 5}, + ) + + TestCase.maxDiff = None + TestCase().assertLessEqual( + len(test_scan_event["keywords"]), len(scanner_event["keywords"]) + ) + TestCase().assertLessEqual( + len(test_scan_event["regular_expressions"]), + len(scanner_event["regular_expressions"]), + ) + TestCase().assertLessEqual( + len(test_scan_event["identifiers"]), len(scanner_event["identifiers"]) + ) + TestCase().assertLessEqual( + len(test_scan_event["strings"]), len(scanner_event["strings"]) + ) + TestCase().assertLessEqual( + len(test_scan_event["tokens"]), len(scanner_event["tokens"]) + )