From c8f0c608e5c89191c4900a10780a615c6367b71a Mon Sep 17 00:00:00 2001 From: Ryan O'Horo <10855297+ryanohoro@users.noreply.github.com> Date: Fri, 30 Dec 2022 02:35:36 -0600 Subject: [PATCH] ScanJpeg improvements, fixes #270 --- src/python/strelka/scanners/scan_jpeg.py | 100 ++++++++++++++++----- src/python/strelka/tests/test_scan_jpeg.py | 4 +- 2 files changed, 80 insertions(+), 24 deletions(-) diff --git a/src/python/strelka/scanners/scan_jpeg.py b/src/python/strelka/scanners/scan_jpeg.py index d3c76ef0..eda358e6 100644 --- a/src/python/strelka/scanners/scan_jpeg.py +++ b/src/python/strelka/scanners/scan_jpeg.py @@ -1,30 +1,86 @@ +import struct + from strelka import strelka class ScanJpeg(strelka.Scanner): - """Extracts data embedded in JPEG files. + """Extracts data appended to JPEG files. - This scanner extracts data that is inserted past the JFIF trailer. + This scanner extracts data that is inserted past the JFIF EOI marker. """ + def scan(self, data, file, options, expire_at): - if not data.endswith(b'\xff\xd9'): - trailer_index = data.rfind(b'\xff\xd9') - if trailer_index == -1: - self.flags.append('no_trailer') + + offset = 0 + + # Skip check for length with these markers + markers_zero_length = [ + b"\xff\xd0", + b"\xff\xd1", + b"\xff\xd2", + b"\xff\xd3", + b"\xff\xd4", + b"\xff\xd5", + b"\xff\xd6", + b"\xff\xd7", + b"\xff\xd8", + b"\xff\x01", + ] + + # Image must start with SOI + if not data[offset:].startswith(b"\xff\xd8"): + self.flags.append("corrupt_jpeg_data_no_soi") + return + + # Skip SOI + offset += 2 + while True: + + marker = data[offset : offset + 2] + + # Marker must start with 0xff + if marker[0] != 0xFF: + self.flags.append("corrupt_jpeg_data_misaligned_marker") + break + + if marker in markers_zero_length: + offset += 2 + continue + # Start scan data (SOS) + elif marker == b"\xff\xda": + offset += 2 + while True: + # Fast forward until we find a marker that's not FF00 + if data[offset] == 0xFF and data[offset + 1] != 0x00: + break + offset += 1 + continue + # EOI marker + elif marker == b"\xff\xd9": + offset += 2 + break else: - trailer_data = data[trailer_index + 2:] - if trailer_data: - self.event['trailer_index'] = trailer_index - - extract_file = strelka.File( - source=self.name, - ) - - for c in strelka.chunk_string(trailer_data): - self.upload_to_coordinator( - extract_file.pointer, - c, - expire_at, - ) - - self.files.append(extract_file) + marker_length = struct.unpack(">H", data[offset + 2 : offset + 4])[0] + offset += 2 + offset += marker_length + + # If the end of the image is reached with no more data, return + if offset >= len(data): + self.flags.append("no_trailer") + return + + if trailer_data := data[offset:]: + self.event["trailer_index"] = offset + + extract_file = strelka.File( + source=self.name, + ) + + for c in strelka.chunk_string(trailer_data): + self.upload_to_coordinator( + extract_file.pointer, + c, + expire_at, + ) + + self.files.append(extract_file) diff --git a/src/python/strelka/tests/test_scan_jpeg.py b/src/python/strelka/tests/test_scan_jpeg.py index 9e338fb1..735c6f4c 100644 --- a/src/python/strelka/tests/test_scan_jpeg.py +++ b/src/python/strelka/tests/test_scan_jpeg.py @@ -11,7 +11,7 @@ def test_scan_jpeg(mocker): Failure: Unable to load file or sample event fails to match. """ - test_scan_event = {"elapsed": mock.ANY, "flags": []} + test_scan_event = {"elapsed": mock.ANY, "flags": ["no_trailer"]} scanner_event = run_test_scan( mocker=mocker, @@ -29,7 +29,7 @@ def test_scan_jpeg_pe_overlay(mocker): Failure: Unable to load file or sample event fails to match. """ - test_scan_event = {"elapsed": mock.ANY, "flags": [], "trailer_index": 308564} + test_scan_event = {"elapsed": mock.ANY, "flags": [], "trailer_index": 308566} scanner_event = run_test_scan( mocker=mocker,