diff --git a/build/python/backend/Dockerfile b/build/python/backend/Dockerfile index 03d7e27e..bb33ea2a 100644 --- a/build/python/backend/Dockerfile +++ b/build/python/backend/Dockerfile @@ -8,7 +8,7 @@ ARG CONFIG_TESTS=false ARG YARA_VERSION=4.3.1 ARG CAPA_VERSION=6.1.0 -ARG EXIFTOOL_VERSION=12.52 +ARG EXIFTOOL_VERSION=12.60 # Environment variables ENV PYTHONDONTWRITEBYTECODE 1 diff --git a/configs/python/backend/backend.yaml b/configs/python/backend/backend.yaml index f3f098c3..606cbe5b 100644 --- a/configs/python/backend/backend.yaml +++ b/configs/python/backend/backend.yaml @@ -154,62 +154,9 @@ scanners: - 'application/msword' - 'olecf_file' - 'application/vnd.ms-excel' - priority: 5 - options: - keys: - - 'Author' - - 'Characters' - - 'Company' - - 'CreateDate' - - 'LastModifiedBy' - - 'Lines' - - 'ModifyDate' - - 'Pages' - - 'Paragraphs' - - 'RevisionNumber' - - 'Software' - - 'Template' - - 'Title' - - 'TotalEditTime' - - 'Words' - tmp_directory: '/dev/shm/' - - positive: - flavors: - 'application/pdf' - 'pdf_file' - priority: 5 - options: - keys: - - 'Author' - - 'CreateDate' - - 'Creator' - - 'CreatorTool' - - 'Linearized' - - 'ModifyDate' - - 'PageCount' - - 'PDFVersion' - - 'Producer' - - 'Title' - tmp_directory: '/dev/shm/' - - positive: - flavors: - 'lnk_file' - priority: 5 - options: - keys: - - 'CommandLineArguments' - - 'Description' - - 'FileAttributes' - - 'Flags' - - 'HotKey' - - 'IconFileName' - - 'IconIndex' - - 'RunWindow' - - 'TargetFileSize' - - 'WorkingDirectory' - tmp_directory: '/dev/shm/' - - positive: - flavors: - 'image/gif' - 'gif_file' - 'image/jpeg' @@ -227,11 +174,6 @@ scanners: - 'image/heic' - 'image/heif' priority: 5 - options: - keys: - - 'ImageHeight' - - 'ImageWidth' - tmp_directory: '/dev/shm/' # 'ScanFloss': # - positive: # flavors: diff --git a/src/python/strelka/scanners/scan_exiftool.py b/src/python/strelka/scanners/scan_exiftool.py index 786eb33b..2b4dc832 100644 --- a/src/python/strelka/scanners/scan_exiftool.py +++ b/src/python/strelka/scanners/scan_exiftool.py @@ -1,4 +1,3 @@ -import ast import json import subprocess import tempfile @@ -9,47 +8,48 @@ class ScanExiftool(strelka.Scanner): """Collects metadata parsed by Exiftool. + This scanner uses Exiftool to extract metadata from files and logs the + extracted key-value pairs. + Options: - keys: exiftool key values to log in the event. - Defaults to all. tmp_directory: Location where tempfile writes temporary files. Defaults to '/tmp/'. """ def scan(self, data, file, options, expire_at): - keys = options.get("keys", []) tmp_directory = options.get("tmp_directory", "/tmp/") + # Use a temporary file to store the data for processing with Exiftool with tempfile.NamedTemporaryFile(dir=tmp_directory) as tmp_data: tmp_data.write(data) tmp_data.flush() - (stdout, stderr) = subprocess.Popen( - ["exiftool", "-d", '"%s"', "-j", tmp_data.name], - stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL, - ).communicate() - - if stdout: - exiftool_dictionary = json.loads(stdout)[0] - - self.event["keys"] = [] - for k, v in exiftool_dictionary.items(): - if keys and k not in keys: - continue - - if isinstance(v, str): - v = v.strip() - v = v.strip("'\"") - - try: - v = ast.literal_eval(v) - except (ValueError, SyntaxError): - pass - - self.event["keys"].append( - { - "key": k, - "value": v, - } - ) + try: + # Execute exiftool and retrieve JSON metadata output + (stdout, stderr) = subprocess.Popen( + ["exiftool", "-j", tmp_data.name], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ).communicate() + + if stdout: + # Load metadata from stdout and update the event dictionary with it + # Converts fields with spaces to underscores to accommodate + # searchability (i.e., "File Name" to "file_name") + metadata = json.loads(stdout)[0] + for key, value in metadata.items(): + formatted_key = key.replace(" ", "_").replace("/", "_").lower() + + # Convert any lists to a comma-separated string + if isinstance(value, list): + value = ", ".join(map(str, value)) + + self.event[formatted_key] = value + + # Handle potential errors from exiftool and JSON decoding + except subprocess.CalledProcessError as e: + self.flags.append(f"exiftool_error: Subprocess Error - {str(e)}") + except json.JSONDecodeError as e: + self.flags.append(f"exiftool_error: JSON Decode Error - {str(e)}") + except Exception as e: + self.flags.append(f"exiftool_error: General Error - {str(e)}") diff --git a/src/python/strelka/tests/test_scan_exiftool.py b/src/python/strelka/tests/test_scan_exiftool.py index 7bb9dda8..0c8f84dd 100644 --- a/src/python/strelka/tests/test_scan_exiftool.py +++ b/src/python/strelka/tests/test_scan_exiftool.py @@ -14,56 +14,54 @@ def test_scan_exiftool_doc(mocker): test_scan_event = { "elapsed": mock.ANY, "flags": [], - "keys": [ - {"key": "SourceFile", "value": mock.ANY}, - {"key": "ExifToolVersion", "value": 12.52}, - {"key": "FileName", "value": mock.ANY}, - {"key": "Directory", "value": mock.ANY}, - {"key": "FileSize", "value": "51 kB"}, - {"key": "FileModifyDate", "value": mock.ANY}, - {"key": "FileAccessDate", "value": mock.ANY}, - {"key": "FileInodeChangeDate", "value": mock.ANY}, - {"key": "FilePermissions", "value": "-rw-------"}, - {"key": "FileType", "value": "DOC"}, - {"key": "FileTypeExtension", "value": "doc"}, - {"key": "MIMEType", "value": "application/msword"}, - {"key": "Identification", "value": "Word 8.0"}, - {"key": "LanguageCode", "value": "English (US)"}, - {"key": "DocFlags", "value": "Has picture, 1Table, ExtChar"}, - {"key": "System", "value": "Windows"}, - {"key": "Word97", "value": "No"}, - {"key": "Title", "value": ""}, - {"key": "Subject", "value": ""}, - {"key": "Author", "value": "Ryan.OHoro"}, - {"key": "Keywords", "value": ""}, - {"key": "Comments", "value": ""}, - {"key": "Template", "value": "Normal.dotm"}, - {"key": "LastModifiedBy", "value": "Ryan.OHoro"}, - {"key": "Software", "value": "Microsoft Office Word"}, - {"key": "CreateDate", "value": mock.ANY}, - {"key": "ModifyDate", "value": mock.ANY}, - {"key": "Security", "value": None}, - {"key": "CodePage", "value": "Windows Latin 1 (Western European)"}, - {"key": "Company", "value": "Target Corporation"}, - {"key": "CharCountWithSpaces", "value": 2877}, - {"key": "AppVersion", "value": 16.0}, - {"key": "ScaleCrop", "value": "No"}, - {"key": "LinksUpToDate", "value": "No"}, - {"key": "SharedDoc", "value": "No"}, - {"key": "HyperlinksChanged", "value": "No"}, - {"key": "TitleOfParts", "value": ""}, - {"key": "HeadingPairs", "value": ["Title", 1]}, - {"key": "CompObjUserTypeLen", "value": 32}, - {"key": "CompObjUserType", "value": "Microsoft Word 97-2003 Document"}, - {"key": "LastPrinted", "value": "0000:00:00 00:00:00"}, - {"key": "RevisionNumber", "value": 2}, - {"key": "TotalEditTime", "value": "1 minute"}, - {"key": "Words", "value": 430}, - {"key": "Characters", "value": 2452}, - {"key": "Pages", "value": 1}, - {"key": "Paragraphs", "value": 5}, - {"key": "Lines", "value": 20}, - ], + "sourcefile": mock.ANY, + "exiftoolversion": 12.6, + "filename": mock.ANY, + "directory": "/tmp", + "filesize": "51 kB", + "filemodifydate": mock.ANY, + "fileaccessdate": mock.ANY, + "fileinodechangedate": mock.ANY, + "filepermissions": "-rw-------", + "filetype": "DOC", + "filetypeextension": "doc", + "mimetype": "application/msword", + "identification": "Word 8.0", + "languagecode": "English (US)", + "docflags": "Has picture, 1Table, ExtChar", + "system": "Windows", + "word97": "No", + "title": "", + "subject": "", + "author": "Ryan.OHoro", + "keywords": "", + "comments": "", + "template": "Normal.dotm", + "lastmodifiedby": "Ryan.OHoro", + "software": "Microsoft Office Word", + "createdate": "2022:12:16 19:48:00", + "modifydate": "2022:12:16 19:48:00", + "security": "None", + "codepage": "Windows Latin 1 (Western European)", + "company": "Target Corporation", + "charcountwithspaces": 2877, + "appversion": 16.0, + "scalecrop": "No", + "linksuptodate": "No", + "shareddoc": "No", + "hyperlinkschanged": "No", + "titleofparts": "", + "headingpairs": "Title, 1", + "compobjusertypelen": 32, + "compobjusertype": "Microsoft Word 97-2003 Document", + "lastprinted": "0000:00:00 00:00:00", + "revisionnumber": 2, + "totaledittime": "1 minute", + "words": 430, + "characters": 2452, + "pages": 1, + "paragraphs": 5, + "lines": 20, } scanner_event = run_test_scan( @@ -85,110 +83,84 @@ def test_scan_exiftool_jpg(mocker): test_scan_event = { "elapsed": mock.ANY, "flags": [], - "keys": [ - {"key": "SourceFile", "value": mock.ANY}, - {"key": "ExifToolVersion", "value": 12.52}, - {"key": "FileName", "value": mock.ANY}, - {"key": "Directory", "value": mock.ANY}, - {"key": "FileSize", "value": "309 kB"}, - {"key": "FileModifyDate", "value": mock.ANY}, - {"key": "FileAccessDate", "value": mock.ANY}, - {"key": "FileInodeChangeDate", "value": mock.ANY}, - {"key": "FilePermissions", "value": "-rw-------"}, - {"key": "FileType", "value": "JPEG"}, - {"key": "FileTypeExtension", "value": "jpg"}, - {"key": "MIMEType", "value": "image/jpeg"}, - {"key": "ExifByteOrder", "value": "Little-endian (Intel, II)"}, - {"key": "Orientation", "value": "Horizontal (normal)"}, - {"key": "XResolution", "value": 72}, - {"key": "YResolution", "value": 72}, - {"key": "ResolutionUnit", "value": "inches"}, - {"key": "Software", "value": "ACDSee Pro 7"}, - {"key": "ModifyDate", "value": mock.ANY}, - {"key": "YCbCrPositioning", "value": "Centered"}, - {"key": "SubSecTime", "value": 903}, - {"key": "ExifImageWidth", "value": 1236}, - {"key": "ExifImageHeight", "value": 891}, - {"key": "XMPToolkit", "value": "Image::ExifTool 12.44"}, - {"key": "GPSLatitude", "value": "22 deg 54' 40.92\" S"}, - {"key": "GPSLongitude", "value": "43 deg 12' 21.30\" W"}, - {"key": "ProfileCMMType", "value": "Linotronic"}, - {"key": "ProfileVersion", "value": "2.1.0"}, - {"key": "ProfileClass", "value": "Display Device Profile"}, - {"key": "ColorSpaceData", "value": "RGB"}, - {"key": "ProfileConnectionSpace", "value": "XYZ"}, - {"key": "ProfileDateTime", "value": mock.ANY}, - {"key": "ProfileFileSignature", "value": "acsp"}, - {"key": "PrimaryPlatform", "value": "Microsoft Corporation"}, - {"key": "CMMFlags", "value": "Not Embedded, Independent"}, - {"key": "DeviceManufacturer", "value": "Hewlett-Packard"}, - {"key": "DeviceModel", "value": "sRGB"}, - {"key": "DeviceAttributes", "value": "Reflective, Glossy, Positive, Color"}, - {"key": "RenderingIntent", "value": "Perceptual"}, - {"key": "ConnectionSpaceIlluminant", "value": "0.9642 1 0.82491"}, - {"key": "ProfileCreator", "value": "Hewlett-Packard"}, - {"key": "ProfileID", "value": 0}, - { - "key": "ProfileCopyright", - "value": "Copyright (c) 1998 Hewlett-Packard Company", - }, - {"key": "ProfileDescription", "value": "sRGB IEC61966-2.1"}, - {"key": "MediaWhitePoint", "value": "0.95045 1 1.08905"}, - {"key": "MediaBlackPoint", "value": "0 0 0"}, - {"key": "RedMatrixColumn", "value": "0.43607 0.22249 0.01392"}, - {"key": "GreenMatrixColumn", "value": "0.38515 0.71687 0.09708"}, - {"key": "BlueMatrixColumn", "value": "0.14307 0.06061 0.7141"}, - {"key": "DeviceMfgDesc", "value": "IEC http://www.iec.ch"}, - { - "key": "DeviceModelDesc", - "value": "IEC 61966-2.1 Default RGB colour space - sRGB", - }, - { - "key": "ViewingCondDesc", - "value": "Reference Viewing Condition in IEC61966-2.1", - }, - {"key": "ViewingCondIlluminant", "value": "19.6445 20.3718 16.8089"}, - {"key": "ViewingCondSurround", "value": "3.92889 4.07439 3.36179"}, - {"key": "ViewingCondIlluminantType", "value": "D50"}, - {"key": "Luminance", "value": "76.03647 80 87.12462"}, - {"key": "MeasurementObserver", "value": "CIE 1931"}, - {"key": "MeasurementBacking", "value": "0 0 0"}, - {"key": "MeasurementGeometry", "value": "Unknown"}, - {"key": "MeasurementFlare", "value": "0.999%"}, - {"key": "MeasurementIlluminant", "value": "D65"}, - {"key": "Technology", "value": "Cathode Ray Tube Display"}, - { - "key": "RedTRC", - "value": "(Binary data 2060 bytes, use -b option to extract)", - }, - { - "key": "GreenTRC", - "value": "(Binary data 2060 bytes, use -b option to extract)", - }, - { - "key": "BlueTRC", - "value": "(Binary data 2060 bytes, use -b option to extract)", - }, - { - "key": "Comment", - "value": "Colégio Militar do Rio de Janeiro (J David, 1906)", - }, - {"key": "ImageWidth", "value": 1236}, - {"key": "ImageHeight", "value": 891}, - {"key": "EncodingProcess", "value": "Baseline DCT, Huffman coding"}, - {"key": "BitsPerSample", "value": 8}, - {"key": "ColorComponents", "value": 3}, - {"key": "YCbCrSubSampling", "value": "YCbCr4:2:2 (2 1)"}, - {"key": "ImageSize", "value": "1236x891"}, - {"key": "Megapixels", "value": 1.1}, - {"key": "SubSecModifyDate", "value": mock.ANY}, - {"key": "GPSLatitudeRef", "value": "South"}, - {"key": "GPSLongitudeRef", "value": "West"}, - { - "key": "GPSPosition", - "value": "22 deg 54' 40.92\" S, 43 deg 12' 21.30\" W", - }, - ], + "sourcefile": mock.ANY, + "exiftoolversion": 12.6, + "filename": mock.ANY, + "directory": "/tmp", + "filesize": "309 kB", + "filemodifydate": mock.ANY, + "fileaccessdate": mock.ANY, + "fileinodechangedate": mock.ANY, + "filepermissions": "-rw-------", + "filetype": "JPEG", + "filetypeextension": "jpg", + "mimetype": "image/jpeg", + "exifbyteorder": "Little-endian (Intel, II)", + "orientation": "Horizontal (normal)", + "xresolution": 72, + "yresolution": 72, + "resolutionunit": "inches", + "software": "ACDSee Pro 7", + "modifydate": "2021:02:06 19:55:44", + "ycbcrpositioning": "Centered", + "subsectime": 903, + "exifimagewidth": 1236, + "exifimageheight": 891, + "xmptoolkit": "Image::ExifTool 12.44", + "gpslatitude": "22 deg 54' 40.92\" S", + "gpslongitude": "43 deg 12' 21.30\" W", + "profilecmmtype": "Linotronic", + "profileversion": "2.1.0", + "profileclass": "Display Device Profile", + "colorspacedata": "RGB ", + "profileconnectionspace": "XYZ ", + "profiledatetime": "1998:02:09 06:49:00", + "profilefilesignature": "acsp", + "primaryplatform": "Microsoft Corporation", + "cmmflags": "Not Embedded, Independent", + "devicemanufacturer": "Hewlett-Packard", + "devicemodel": "sRGB", + "deviceattributes": "Reflective, Glossy, Positive, Color", + "renderingintent": "Perceptual", + "connectionspaceilluminant": "0.9642 1 0.82491", + "profilecreator": "Hewlett-Packard", + "profileid": 0, + "profilecopyright": "Copyright (c) 1998 Hewlett-Packard Company", + "profiledescription": "sRGB IEC61966-2.1", + "mediawhitepoint": "0.95045 1 1.08905", + "mediablackpoint": "0 0 0", + "redmatrixcolumn": "0.43607 0.22249 0.01392", + "greenmatrixcolumn": "0.38515 0.71687 0.09708", + "bluematrixcolumn": "0.14307 0.06061 0.7141", + "devicemfgdesc": "IEC http://www.iec.ch", + "devicemodeldesc": "IEC 61966-2.1 Default RGB colour space - sRGB", + "viewingconddesc": "Reference Viewing Condition in IEC61966-2.1", + "viewingcondilluminant": "19.6445 20.3718 16.8089", + "viewingcondsurround": "3.92889 4.07439 3.36179", + "viewingcondilluminanttype": "D50", + "luminance": "76.03647 80 87.12462", + "measurementobserver": "CIE 1931", + "measurementbacking": "0 0 0", + "measurementgeometry": "Unknown", + "measurementflare": "0.999%", + "measurementilluminant": "D65", + "technology": "Cathode Ray Tube Display", + "redtrc": "(Binary data 2060 bytes, use -b option to extract)", + "greentrc": "(Binary data 2060 bytes, use -b option to extract)", + "bluetrc": "(Binary data 2060 bytes, use -b option to extract)", + "comment": "Colégio Militar do Rio de Janeiro (J David, 1906)", + "imagewidth": 1236, + "imageheight": 891, + "encodingprocess": "Baseline DCT, Huffman coding", + "bitspersample": 8, + "colorcomponents": 3, + "ycbcrsubsampling": "YCbCr4:2:2 (2 1)", + "imagesize": "1236x891", + "megapixels": 1.1, + "subsecmodifydate": "2021:02:06 19:55:44.903", + "gpslatituderef": "South", + "gpslongituderef": "West", + "gpsposition": "22 deg 54' 40.92\" S, 43 deg 12' 21.30\" W", } scanner_event = run_test_scan(