Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Limiter for Javascript Output, Tests, Formatting #301

Merged
merged 3 commits into from
Jan 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions configs/python/backend/backend.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ scanners:
priority: 5
options:
beautify: True
max_strings: 50
'ScanJpeg':
- positive:
flavors:
Expand Down
92 changes: 56 additions & 36 deletions src/python/strelka/scanners/scan_javascript.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,50 +12,70 @@ class ScanJavascript(strelka.Scanner):
deobfuscated.
Defaults to True.
"""

def scan(self, data, file, options, expire_at):
beautify = options.get('beautify', True)
beautify = options.get("beautify", True)
max_strings = options.get("max_strings", 50)

self.event.setdefault('tokens', [])
self.event.setdefault('keywords', [])
self.event.setdefault('strings', [])
self.event.setdefault('identifiers', [])
self.event.setdefault('regular_expressions', [])
self.event['beautified'] = False
self.event.setdefault("tokens", set())
self.event.setdefault("keywords", set())
self.event.setdefault("strings", set())
self.event.setdefault("identifiers", set())
self.event.setdefault("regular_expressions", set())
self.event["beautified"] = False

js = None

try:
if beautify:
js = jsbeautifier.beautify(data.decode())
self.event['beautified'] = True
self.event["beautified"] = True
except strelka.ScannerTimeout:
raise
except Exception:
self.flags.append("beautify_failed")

try:
if js is None:
js = data.decode()
except strelka.ScannerTimeout:
raise
except Exception:
self.flags.append("decode_failed")

try:
tokens = esprima.tokenize(
js,
options={
"comment": True,
"tolerant": True,
},
)
for t in tokens:
if t.type not in self.event["tokens"]:
self.event["tokens"].add(t.type)
if t.type == "String":
stripped_val = t.value.strip("\"'")
if stripped_val not in self.event["strings"]:
self.event["strings"].add(stripped_val)
if t.type == "Keyword":
if t.value not in self.event["keywords"]:
self.event["keywords"].add(t.value)
if t.type == "Identifier":
if t.value not in self.event["identifiers"]:
self.event["identifiers"].add(t.value)
if t.type == "RegularExpression":
if t.value not in self.event["regular_expressions"]:
self.event["regular_expressions"].add(t.value)

self.event["tokens"] = list(self.event["tokens"])[:max_strings]
self.event["keywords"] = list(self.event["keywords"])[:max_strings]
self.event["strings"] = list(self.event["strings"])[:max_strings]
self.event["identifiers"] = list(self.event["identifiers"])[:max_strings]
self.event["regular_expressions"] = list(self.event["regular_expressions"])[
:max_strings
]
except strelka.ScannerTimeout:
raise
except Exception:
self.flags.append('beautify_failed')

if js is None:
js = data.decode()

tokens = esprima.tokenize(
js,
options={
'comment': True,
'tolerant': True,
}
)
for t in tokens:
if t.type not in self.event['tokens']:
self.event['tokens'].append(t.type)
if t.type == 'String':
stripped_val = t.value.strip('"\'')
if stripped_val not in self.event['strings']:
self.event['strings'].append(stripped_val)
if t.type == 'Keyword':
if t.value not in self.event['keywords']:
self.event['keywords'].append(t.value)
if t.type == 'Identifier':
if t.value not in self.event['identifiers']:
self.event['identifiers'].append(t.value)
if t.type == 'RegularExpression':
if t.value not in self.event['regular_expressions']:
self.event['regular_expressions'].append(t.value)
self.flags.append("tokenization_failed")
241 changes: 150 additions & 91 deletions src/python/strelka/tests/test_scan_javascript.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pathlib import Path
from unittest import TestCase, mock

from pytest_unordered import unordered
from strelka.scanners.scan_javascript import ScanJavascript as ScanUnderTest
from strelka.tests import run_test_scan

Expand All @@ -14,97 +15,105 @@ def test_scan_javascript(mocker):
test_scan_event = {
"elapsed": mock.ANY,
"flags": [],
"tokens": [
"BlockComment",
"String",
"Punctuator",
"Keyword",
"Identifier",
"LineComment",
"RegularExpression",
"Numeric",
],
"keywords": [
"var",
"function",
"return",
"for",
"if",
"throw",
"else",
"typeof",
"new",
"this",
"in",
],
"strings": [
"use strict",
"path",
"fs",
"package.json",
"",
"-",
"Could not find partial with name ",
".",
"string",
"function",
"Found unknown type of partial ",
" (",
") in Handlebars partial Array => ",
"base64",
"utf8",
],
"identifiers": [
"path",
"require",
"fs",
"module",
"exports",
"register",
"Handlebars",
"opt",
"params",
"pkg",
"JSON",
"parse",
"readFileSync",
"join",
"process",
"cwd",
"slugify",
"str",
"toLowerCase",
"replace",
"helpers",
"key",
"escape",
"Utils",
"escapeExpression",
"jsonStringify",
"obj",
"stringify",
"concat",
"arr",
"i",
"arguments",
"length",
"partial",
"name",
"context",
"partials",
"compile",
"SafeString",
"atob",
"a",
"Buffer",
"toString",
"btoa",
"b",
"helper",
"hasOwnProperty",
"registerHelper",
],
"regular_expressions": ["/[^\\w ]+/g", "/ +/g"],
"tokens": unordered(
[
"BlockComment",
"String",
"Punctuator",
"Keyword",
"Identifier",
"LineComment",
"RegularExpression",
"Numeric",
]
),
"keywords": unordered(
[
"var",
"function",
"return",
"for",
"if",
"throw",
"else",
"typeof",
"new",
"this",
"in",
]
),
"strings": unordered(
[
"use strict",
"path",
"fs",
"package.json",
"",
"-",
"Could not find partial with name ",
".",
"string",
"function",
"Found unknown type of partial ",
" (",
") in Handlebars partial Array => ",
"base64",
"utf8",
]
),
"identifiers": unordered(
[
"path",
"require",
"fs",
"module",
"exports",
"register",
"Handlebars",
"opt",
"params",
"pkg",
"JSON",
"parse",
"readFileSync",
"join",
"process",
"cwd",
"slugify",
"str",
"toLowerCase",
"replace",
"helpers",
"key",
"escape",
"Utils",
"escapeExpression",
"jsonStringify",
"obj",
"stringify",
"concat",
"arr",
"i",
"arguments",
"length",
"partial",
"name",
"context",
"partials",
"compile",
"SafeString",
"atob",
"a",
"Buffer",
"toString",
"btoa",
"b",
"helper",
"hasOwnProperty",
"registerHelper",
]
),
"regular_expressions": unordered(["/[^\\w ]+/g", "/ +/g"]),
"beautified": True,
}

Expand All @@ -116,3 +125,53 @@ def test_scan_javascript(mocker):

TestCase.maxDiff = None
TestCase().assertDictEqual(test_scan_event, scanner_event)


def test_scan_javascript_character_max_strings(mocker):
"""
Pass: Sample event matches output of scanner.
Failure: Unable to load file or sample event fails to match.
"""

test_scan_event = {
"elapsed": mock.ANY,
"flags": [],
"tokens": unordered(
[
"BlockComment",
"String",
"Punctuator",
"Keyword",
]
),
"keywords": unordered(["throw", "return", "else", "var", "new"]),
"strings": unordered(["", "path", "string", "-", "base64"]),
"identifiers": unordered(["exports", "params", "cwd", "Buffer", "escape"]),
"regular_expressions": unordered(["/[^\\w ]+/g", "/ +/g"]),
"beautified": True,
}

scanner_event = run_test_scan(
mocker=mocker,
scan_class=ScanUnderTest,
fixture_path=Path(__file__).parent / "fixtures/test.js",
options={"max_strings": 5},
)

TestCase.maxDiff = None
TestCase().assertLessEqual(
len(test_scan_event["keywords"]), len(scanner_event["keywords"])
)
TestCase().assertLessEqual(
len(test_scan_event["regular_expressions"]),
len(scanner_event["regular_expressions"]),
)
TestCase().assertLessEqual(
len(test_scan_event["identifiers"]), len(scanner_event["identifiers"])
)
TestCase().assertLessEqual(
len(test_scan_event["strings"]), len(scanner_event["strings"])
)
TestCase().assertLessEqual(
len(test_scan_event["tokens"]), len(scanner_event["tokens"])
)