diff --git a/CHANGELOG.md b/CHANGELOG.md
index c532c1db..4d075f9a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,12 @@
# Changelog
Changes to the project will be tracked in this file via the date of change.
+## 2023-11-04
+- Added `ScanIqy` to target and extract network addressed from IQY (Internet Query) files
+- Added tests for `ScanIqy`
+- Fix for a `poetry` build issue
+- Fix for `ScanPcap` tests
+
## 2023-10-25
- Changes to `ScanExiftool` scanner and tests
- Update `google.golang.org/grpc` dependency
diff --git a/build/python/backend/Dockerfile b/build/python/backend/Dockerfile
index bb33ea2a..99f450c1 100644
--- a/build/python/backend/Dockerfile
+++ b/build/python/backend/Dockerfile
@@ -153,15 +153,21 @@ RUN mkdir jtr && cd jtr && git init && git remote add origin https://github.com/
chmod -R 777 /jtr && \
chown -R $USER_UID:$USER_UID /jtr
-# Poetry setup
-RUN curl -sSL https://install.python-poetry.org | python3 - && \
- export PATH="/root/.local/bin:$PATH" && \
- poetry config virtualenvs.create false
+# Install Poetry globally and copy project files
+RUN python3 -m pip install -U pip setuptools && \
+ python3 -m pip install poetry && \
+ rm -rf /root/.cache/pip
-# Project setup
-COPY ./pyproject.toml ./poetry.lock /strelka/
+# Set the working directory and copy the project files
WORKDIR /strelka/
-RUN /root/.local/bin/poetry install --no-dev
+COPY pyproject.toml poetry.lock ./
+
+# Use Poetry to install the project dependencies globally
+# This step is after the COPY step because it is more likely to change,
+# and therefore should not be included in earlier layers that can be cached.
+RUN poetry config virtualenvs.create false && \
+ poetry install --no-dev && \
+ rm -rf /root/.cache/pypoetry
# Copy Strelka files
COPY ./src/python/ /strelka/
diff --git a/configs/python/backend/backend.yaml b/configs/python/backend/backend.yaml
index 606cbe5b..128d785e 100644
--- a/configs/python/backend/backend.yaml
+++ b/configs/python/backend/backend.yaml
@@ -249,6 +249,11 @@ scanners:
priority: 5
options:
limit: 50
+ 'ScanIqy':
+ - positive:
+ flavors:
+ - 'iqy_file'
+ priority: 5
'ScanJarManifest':
- positive:
flavors:
diff --git a/configs/python/backend/taste/taste.yara b/configs/python/backend/taste/taste.yara
index 966d30c1..b4ac884b 100644
--- a/configs/python/backend/taste/taste.yara
+++ b/configs/python/backend/taste/taste.yara
@@ -335,6 +335,23 @@ rule excel4_file
(uint32be(0) == 0x504b0304 and $rels and $sheet and $xlsstr)
}
+rule iqy_file {
+ meta:
+ description = "Detects potential IQY (Excel Web Query) files with various protocols"
+ author = "Paul Hutelmyer"
+ date = "2023-11-02"
+ strings:
+ $iqy_header = /^WEB\n/ nocase
+ $http = /http:\/\// nocase
+ $https = /https:\/\// nocase
+ $ftp = /ftp:\/\// nocase
+ $ftps = /ftps:\/\// nocase
+ $file = /file:\/\// nocase
+ $smb = /smb:\/\// nocase
+condition:
+ $iqy_header at 0 and ($http or $https or $ftp or $ftps or $file or $smb)
+}
+
rule onenote_file
{
meta:
diff --git a/docs/README.md b/docs/README.md
index 05a1244d..e97e568a 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -738,77 +738,78 @@ Each scanner parses files of a specific flavor and performs data collection and/
### Scanner List
The table below describes each scanner and its options. Each scanner has the hidden option "scanner_timeout" which can override the distribution scanner_timeout.
-| Scanner Name | Scanner Description | Scanner Options | Contributor |
-|-------------------|----------------------------------------------------------------------------------------||-------------------------------------------------------------------------------------------------|
-| ScanAntiword | Extracts text from MS Word documents | `tempfile_directory` -- location where tempfile writes temporary files (defaults to `/tmp/`) |
-| ScanBatch | Collects metadata from batch script files | N/A |
-| ScanBase64 | Decodes base64-encoded files | N/A | [Nathan Icart](https://github.com/nateicart)
-| ScanBzip2 | Decompresses bzip2 files | N/A |
-| ScanCapa | Analyzes executable files with FireEye [capa](https://github.com/fireeye/capa) | `tempfile_directory` -- location where `tempfile` will write temporary files (defaults to `/tmp/`)
`location_rules` -- location of the capa rules file or directory (defaults to `/etc/capa/rules/`)
`location_signatures` -- location of the capa signature file or directory (defaults to `/etc/capa/signatures/`) |
-| ScanCcn | Flags files containing credit card formatted numbers | N/A | [Ryan O'Horo](https://github.com/ryanohoro) |
-| ScanCuckoo | Sends files to a Cuckoo sandbox | `url` -- URL of the Cuckoo sandbox (defaults to None)
`priority` -- Cuckoo priority assigned to the task (defaults to `3`)
`timeout` -- amount of time (in seconds) to wait for the task to upload (defaults to `10`)
`unique` -- boolean that tells Cuckoo to only analyze samples that have not been analyzed before (defaults to `True`)
`username` -- username used for authenticating to Cuckoo (defaults to None, optionally read from environment variable "CUCKOO_USERNAME")
`password` -- password used for authenticating to Cuckoo (defaults to None, optionally read from environment variable "CUCKOO_PASSWORD") |
-| ScanDonut | Decrypts, extracts config and embedded payloads from Donut loader payloads (https://github.com/TheWover/donut) using donut-decrypt (https://github.com/volexity/donut-decryptor/) | | [Ryan O'Horo](https://github.com/ryanohoro) |
-| ScanDmg | Collects metadata from Mac DMG and other disk images, and extracts archived files | `limit` -- maximum number of files to extract (defaults to `1000`) |
-| ScanDocx | Collects metadata and extracts text from docx files | `extract_text` -- boolean that determines if document text should be extracted as a child file (defaults to `False`) |
-| ScanElf | Collects metadata from ELF files | N/A |
-| ScanEmail | Collects metadata and extract files from email messages | N/A |
-| ScanEncryptedDoc | Attempts to extract decrypted Office documents through brute force password cracking | `password_file` -- location of passwords file for encrypted documents (defaults to `/etc/strelka/passwords.dat`) |
-| ScanEntropy | Calculates entropy of files | N/A |
-| ScanExiftool | Collects metadata parsed by Exiftool | `tempfile_directory` -- location where tempfile writes temporary files (defaults to `/tmp/`)
`keys` -- list of keys to log (defaults to all) |
-| ScanFalconSandbox | Sends files to an instance of Falcon Sandbox | `server` -- URL of the Falcon Sandbox API inteface
`priority` -- Falcon Sandbox priority assigned to the task (defaults to `3`)
`timeout` -- amount of time (in seconds) to wait for the task to upload (defaults to `60`)
`envID` -- list of numeric envrionment IDs that tells Falcon Sandbox which sandbox to submit a sample to (defaults to `[100]`)
`api_key` -- API key used for authenticating to Falcon Sandbox (defaults to None, optionally read from environment variable "FS_API_KEY")
`api_secret` -- API secret key used for authenticating to Falcon Sandbox (defaults to None, optionally read from environment variable "FS_API_SECKEY") |
-| ScanFloss | Analyzes executable files with FireEye [floss](https://github.com/fireeye/flare-floss) | `tempfile_directory` -- location where `tempfile` will write temporary files (defaults to `/tmp/`)
`limit` -- Maximum amount of strings to collect. (defaults to `100`) |
-| ScanFooter | Collects file footer | `length` -- number of footer characters to log as metadata (defaults to `50`)
`encodings` -- list of output encodings, any of `classic`, `raw`, `hex`, `backslash` |
-| ScanGif | Extracts data embedded in GIF files | N/A |
-| ScanGzip | Decompresses gzip files | N/A
-| ScanHash | Calculates file hash values | N/A |
-| ScanHeader | Collects file header | `length` -- number of header characters to log as metadata (defaults to `50`)
`encodings` -- list of output encodings, any of `classic`, `raw`, `hex`, `backslash` |
-| ScanHtml | Collects metadata and extracts embedded files from HTML files | `parser` -- sets the HTML parser used during scanning (defaults to `html.parser`)
`max_links` -- Maximum amount of links to output in hyperlinks field (defaults to `50`) |
-| ScanIni | Parses keys from INI files | N/A |
-| ScanIso | Collects and extracts files from ISO files | `limit` -- maximum number of files to extract (defaults to `0`) |
-| ScanJarManifest | Collects metadata from JAR manifest files | N/A |
-| ScanJavascript | Collects metadata from Javascript files | `beautify` -- beautifies JavaScript before parsing (defaults to `True`) |
-| ScanJpeg | Extracts data embedded in JPEG files | N/A |
-| ScanJson | Collects keys from JSON files | N/A |
-| ScanLibarchive | Extracts files from libarchive-compatible archives. | `limit` -- maximum number of files to extract (defaults to `1000`) |
-| ScanLnk | Collects metadata from lnk files. | N/A | Ryan Borre, [DerekT2](https://github.com/Derekt2), [Nathan Icart](https://github.com/nateicart)
-| ScanLzma | Decompresses lzma files | N/A |
-| ScanMacho | Collects metadata from Mach-O files | `tempfile_directory` -- location where tempfile writes temporary files (defaults to `/tmp/`) |
-| ScanManifest | Collects metadata from Chrome Manifest files | N/A | [DerekT2](https://github.com/Derekt2)
-| ScanMsi | Collects MSI data parsed by Exiftool | `tempfile_directory` -- location where tempfile writes temporary files (defaults to `/tmp/`)
`keys` -- list of keys to log (defaults to `all`) |
-| ScanOcr | Collects metadata and extracts optical text from image files | `extract_text` -- boolean that determines if document text should be extracted as a child file (defaults to `False`)
`split_words` -- split output text into a list of individual words (Default: True)
`tempfile_directory` -- location where `tempfile` will write temporary files (defaults to `/tmp/`) |
-| ScanOle | Extracts files from OLECF files | N/A |
-| ScanPcap | Extracts files from PCAP/PCAPNG using Zeek | `limit` -- maximum number of files to extract (defaults to `1000`) | [Ryan O'Horo](https://github.com/ryanohoro) |
-| ScanPdf | Collects metadata and extracts streams from PDF files | N/A |
-| ScanPe | Collects metadata from PE files | N/A |
-| ScanPgp | Collects metadata from PGP files | N/A |
-| ScanPhp | Collects metadata from PHP files | N/A |
-| ScanPkcs7 | Extracts files from PKCS7 certificate files | N/A |
-| ScanPlist | Collects attributes from binary and XML property list files | `keys` -- list of keys to log (defaults to `all`) |
-| ScanQr | Collects QR code metadata from image files | `support_inverted` -- Enable/disable image inversion to support inverted QR codes (white on black). Adds some image processing overhead. | [Aaron Herman](https://github.com/aaronherman)
-| ScanRar | Extracts files from RAR archives | `limit` -- maximum number of files to extract (defaults to `1000`)
`password_file` -- location of passwords file for RAR archives (defaults to `/etc/strelka/passwords.dat`) |
-| ScanRpm | Collects metadata and extracts files from RPM files | `tempfile_directory` -- location where `tempfile` will write temporary files (defaults to `/tmp/`) |
-| ScanRtf | Extracts embedded files from RTF files | `limit` -- maximum number of files to extract (defaults to `1000`) |
-| ScanSave | Exposes raw file data in the output response in an encoded and compressed format | `compression` -- compression algorithm to use on the raw file data (defaults to `gzip` - `bzip2`, `lzma`, and `none` are available)
`encoding` -- JSON compatible encoding algorithm to use on the raw file data (defaults to `base64` - `base85` also available) | [Kevin Eiche](https://github.com/keiche)
-| ScanSevenZip | Collects metadata and extracts files from 7z files, including encrypted varieties | `limit` -- maximum number of files to extract (defaults to `1000`)
`crack_pws` -- enable password cracking
`log_pws` -- add cracked passwords to event
`password_file` -- location of wordlist file (defaults to `/etc/strelka/passwords.dat`)
`brute_force` -- enable brute force password cracking
`min_length` -- minimum brute force password length
`max_length` -- maximum brute force password length | [Ryan O'Horo](https://github.com/ryanohoro) |
-| ScanStrings | Collects strings from file data | `limit` -- maximum number of strings to collect, starting from the beginning of the file (defaults to `0`, collects all strings) |
-| ScanSwf | Decompresses swf (Flash) files | N/A |
-| ScanTar | Extract files from tar archives | `limit` -- maximum number of files to extract (defaults to `1000`) |
-| ScanTlsh | Scans and compares a file's TLSH hash with a list of TLSH hashes | "location" -- location of the TLSH rules file or directory (defaults to "/etc/tlsh/")
"score" -- Score comparison threshold for matches (lower = closer match) |
-| ScanTnef | Collects metadata and extract files from TNEF files | N/A |
-| ScanTranscode | Converts uncommon image formats to PNG to ease support in other scanners | `output_format` one of `gif` `webp` `jpeg` `bmp` `png` `tiff` (default `jpeg`) | [Ryan O'Horo](https://github.com/ryanohoro) |
-| ScanUdf | Collects and extracts files from UDF files | `limit` -- maximum number of files to extract (defaults to `100`) | [Ryan O'Horo](https://github.com/ryanohoro) |
-| ScanUpx | Decompresses UPX packed files | `tempfile_directory` -- location where `tempfile` will write temporary files (defaults to `/tmp/`) |
-| ScanUrl | Collects URLs from files | `regex` -- dictionary entry that establishes the regular expression pattern used for URL parsing (defaults to a widely scoped regex) |
-| ScanVb | Collects metadata from Visual Basic script files | N/A |
-| ScanVba | Extracts and analyzes VBA from document files | `analyze_macros` -- boolean that determines if macros should be analyzed (defaults to `True`) |
-| ScanVhd | Collects and extracts files from VHD/VHDX files | `limit` -- maximum number of files to extract (defaults to `100`) | [Ryan O'Horo](https://github.com/ryanohoro) |
-| ScanVsto | Collects and extracts metadata from VSTO files | N/A |
-| ScanX509 | Collects metadata from x509 and CRL files | `type` -- string that determines the type of x509 certificate being scanned (no default, assigned as either "der" or "pem" depending on flavor) |
-| ScanXL4MA | Analyzes and parses Excel 4 Macros from XLSX files | `type` -- string that determines the type of x509 certificate being scanned (no default, assigned as either "der" or "pem" depending on flavor) | Ryan Borre
-| ScanXml | Log metadata and extract files from XML files | `extract_tags` -- list of XML tags that will have their text extracted as child files (defaults to empty list)
`metadata_tags` -- list of XML tags that will have their text logged as metadata (defaults to empty list) |
-| ScanYara | Scans files with YARA rules | `location` -- location of the YARA rules file or directory (defaults to `/etc/strelka/yara/`)
`compiled` -- Enable use of compiled YARA rules, as well as the path.
`store_offset` -- Stores file offset for YARA match
`offset_meta_key` -- YARA meta key that must exist in the YARA rule for the offset to be stored.
`offset_padding` -- Amount of data to be stored before and after offset for additional context. |
-| ScanZip | Extracts files from zip archives | `limit` -- maximum number of files to extract (defaults to `1000`)
`password_file` -- location of passwords file for zip archives (defaults to `/etc/strelka/passwords.dat`) |
-| ScanZlib | Decompresses gzip files | N/A
+| Scanner Name | Scanner Description | Scanner Options | Contributor |
+|-------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------|
+| ScanAntiword | Extracts text from MS Word documents | `tempfile_directory` -- location where tempfile writes temporary files (defaults to `/tmp/`) |
+| ScanBatch | Collects metadata from batch script files | N/A |
+| ScanBase64 | Decodes base64-encoded files | N/A | [Nathan Icart](https://github.com/nateicart)
+| ScanBzip2 | Decompresses bzip2 files | N/A |
+| ScanCapa | Analyzes executable files with FireEye [capa](https://github.com/fireeye/capa) | `tempfile_directory` -- location where `tempfile` will write temporary files (defaults to `/tmp/`)
`location_rules` -- location of the capa rules file or directory (defaults to `/etc/capa/rules/`)
`location_signatures` -- location of the capa signature file or directory (defaults to `/etc/capa/signatures/`) |
+| ScanCcn | Flags files containing credit card formatted numbers | N/A | [Ryan O'Horo](https://github.com/ryanohoro) |
+| ScanCuckoo | Sends files to a Cuckoo sandbox | `url` -- URL of the Cuckoo sandbox (defaults to None)
`priority` -- Cuckoo priority assigned to the task (defaults to `3`)
`timeout` -- amount of time (in seconds) to wait for the task to upload (defaults to `10`)
`unique` -- boolean that tells Cuckoo to only analyze samples that have not been analyzed before (defaults to `True`)
`username` -- username used for authenticating to Cuckoo (defaults to None, optionally read from environment variable "CUCKOO_USERNAME")
`password` -- password used for authenticating to Cuckoo (defaults to None, optionally read from environment variable "CUCKOO_PASSWORD") |
+| ScanDonut | Decrypts, extracts config and embedded payloads from Donut loader payloads (https://github.com/TheWover/donut) using donut-decrypt (https://github.com/volexity/donut-decryptor/) | | [Ryan O'Horo](https://github.com/ryanohoro) |
+| ScanDmg | Collects metadata from Mac DMG and other disk images, and extracts archived files | `limit` -- maximum number of files to extract (defaults to `1000`) |
+| ScanDocx | Collects metadata and extracts text from docx files | `extract_text` -- boolean that determines if document text should be extracted as a child file (defaults to `False`) |
+| ScanElf | Collects metadata from ELF files | N/A |
+| ScanEmail | Collects metadata and extract files from email messages | N/A |
+| ScanEncryptedDoc | Attempts to extract decrypted Office documents through brute force password cracking | `password_file` -- location of passwords file for encrypted documents (defaults to `/etc/strelka/passwords.dat`) |
+| ScanEntropy | Calculates entropy of files | N/A |
+| ScanExiftool | Collects metadata parsed by Exiftool | `tempfile_directory` -- location where tempfile writes temporary files (defaults to `/tmp/`)
`keys` -- list of keys to log (defaults to all) |
+| ScanFalconSandbox | Sends files to an instance of Falcon Sandbox | `server` -- URL of the Falcon Sandbox API inteface
`priority` -- Falcon Sandbox priority assigned to the task (defaults to `3`)
`timeout` -- amount of time (in seconds) to wait for the task to upload (defaults to `60`)
`envID` -- list of numeric envrionment IDs that tells Falcon Sandbox which sandbox to submit a sample to (defaults to `[100]`)
`api_key` -- API key used for authenticating to Falcon Sandbox (defaults to None, optionally read from environment variable "FS_API_KEY")
`api_secret` -- API secret key used for authenticating to Falcon Sandbox (defaults to None, optionally read from environment variable "FS_API_SECKEY") |
+| ScanFloss | Analyzes executable files with FireEye [floss](https://github.com/fireeye/flare-floss) | `tempfile_directory` -- location where `tempfile` will write temporary files (defaults to `/tmp/`)
`limit` -- Maximum amount of strings to collect. (defaults to `100`) |
+| ScanFooter | Collects file footer | `length` -- number of footer characters to log as metadata (defaults to `50`)
`encodings` -- list of output encodings, any of `classic`, `raw`, `hex`, `backslash` |
+| ScanGif | Extracts data embedded in GIF files | N/A |
+| ScanGzip | Decompresses gzip files | N/A
+| ScanHash | Calculates file hash values | N/A |
+| ScanHeader | Collects file header | `length` -- number of header characters to log as metadata (defaults to `50`)
`encodings` -- list of output encodings, any of `classic`, `raw`, `hex`, `backslash` |
+| ScanHtml | Collects metadata and extracts embedded files from HTML files | `parser` -- sets the HTML parser used during scanning (defaults to `html.parser`)
`max_links` -- Maximum amount of links to output in hyperlinks field (defaults to `50`) |
+| ScanIni | Parses keys from INI files | N/A |
+| ScanIqy | Parses Microsoft Excel Internet Query (IQY) files | N/A |
+| ScanIso | Collects and extracts files from ISO files | `limit` -- maximum number of files to extract (defaults to `0`) |
+| ScanJarManifest | Collects metadata from JAR manifest files | N/A |
+| ScanJavascript | Collects metadata from Javascript files | `beautify` -- beautifies JavaScript before parsing (defaults to `True`) |
+| ScanJpeg | Extracts data embedded in JPEG files | N/A |
+| ScanJson | Collects keys from JSON files | N/A |
+| ScanLibarchive | Extracts files from libarchive-compatible archives. | `limit` -- maximum number of files to extract (defaults to `1000`) |
+| ScanLnk | Collects metadata from lnk files. | N/A | Ryan Borre, [DerekT2](https://github.com/Derekt2), [Nathan Icart](https://github.com/nateicart)
+| ScanLzma | Decompresses lzma files | N/A |
+| ScanMacho | Collects metadata from Mach-O files | `tempfile_directory` -- location where tempfile writes temporary files (defaults to `/tmp/`) |
+| ScanManifest | Collects metadata from Chrome Manifest files | N/A | [DerekT2](https://github.com/Derekt2)
+| ScanMsi | Collects MSI data parsed by Exiftool | `tempfile_directory` -- location where tempfile writes temporary files (defaults to `/tmp/`)
`keys` -- list of keys to log (defaults to `all`) |
+| ScanOcr | Collects metadata and extracts optical text from image files | `extract_text` -- boolean that determines if document text should be extracted as a child file (defaults to `False`)
`split_words` -- split output text into a list of individual words (Default: True)
`tempfile_directory` -- location where `tempfile` will write temporary files (defaults to `/tmp/`) |
+| ScanOle | Extracts files from OLECF files | N/A |
+| ScanPcap | Extracts files from PCAP/PCAPNG using Zeek | `limit` -- maximum number of files to extract (defaults to `1000`) | [Ryan O'Horo](https://github.com/ryanohoro) |
+| ScanPdf | Collects metadata and extracts streams from PDF files | N/A |
+| ScanPe | Collects metadata from PE files | N/A |
+| ScanPgp | Collects metadata from PGP files | N/A |
+| ScanPhp | Collects metadata from PHP files | N/A |
+| ScanPkcs7 | Extracts files from PKCS7 certificate files | N/A |
+| ScanPlist | Collects attributes from binary and XML property list files | `keys` -- list of keys to log (defaults to `all`) |
+| ScanQr | Collects QR code metadata from image files | `support_inverted` -- Enable/disable image inversion to support inverted QR codes (white on black). Adds some image processing overhead. | [Aaron Herman](https://github.com/aaronherman)
+| ScanRar | Extracts files from RAR archives | `limit` -- maximum number of files to extract (defaults to `1000`)
`password_file` -- location of passwords file for RAR archives (defaults to `/etc/strelka/passwords.dat`) |
+| ScanRpm | Collects metadata and extracts files from RPM files | `tempfile_directory` -- location where `tempfile` will write temporary files (defaults to `/tmp/`) |
+| ScanRtf | Extracts embedded files from RTF files | `limit` -- maximum number of files to extract (defaults to `1000`) |
+| ScanSave | Exposes raw file data in the output response in an encoded and compressed format | `compression` -- compression algorithm to use on the raw file data (defaults to `gzip` - `bzip2`, `lzma`, and `none` are available)
`encoding` -- JSON compatible encoding algorithm to use on the raw file data (defaults to `base64` - `base85` also available) | [Kevin Eiche](https://github.com/keiche)
+| ScanSevenZip | Collects metadata and extracts files from 7z files, including encrypted varieties | `limit` -- maximum number of files to extract (defaults to `1000`)
`crack_pws` -- enable password cracking
`log_pws` -- add cracked passwords to event
`password_file` -- location of wordlist file (defaults to `/etc/strelka/passwords.dat`)
`brute_force` -- enable brute force password cracking
`min_length` -- minimum brute force password length
`max_length` -- maximum brute force password length | [Ryan O'Horo](https://github.com/ryanohoro) |
+| ScanStrings | Collects strings from file data | `limit` -- maximum number of strings to collect, starting from the beginning of the file (defaults to `0`, collects all strings) |
+| ScanSwf | Decompresses swf (Flash) files | N/A |
+| ScanTar | Extract files from tar archives | `limit` -- maximum number of files to extract (defaults to `1000`) |
+| ScanTlsh | Scans and compares a file's TLSH hash with a list of TLSH hashes | "location" -- location of the TLSH rules file or directory (defaults to "/etc/tlsh/")
"score" -- Score comparison threshold for matches (lower = closer match) |
+| ScanTnef | Collects metadata and extract files from TNEF files | N/A |
+| ScanTranscode | Converts uncommon image formats to PNG to ease support in other scanners | `output_format` one of `gif` `webp` `jpeg` `bmp` `png` `tiff` (default `jpeg`) | [Ryan O'Horo](https://github.com/ryanohoro) |
+| ScanUdf | Collects and extracts files from UDF files | `limit` -- maximum number of files to extract (defaults to `100`) | [Ryan O'Horo](https://github.com/ryanohoro) |
+| ScanUpx | Decompresses UPX packed files | `tempfile_directory` -- location where `tempfile` will write temporary files (defaults to `/tmp/`) |
+| ScanUrl | Collects URLs from files | `regex` -- dictionary entry that establishes the regular expression pattern used for URL parsing (defaults to a widely scoped regex) |
+| ScanVb | Collects metadata from Visual Basic script files | N/A |
+| ScanVba | Extracts and analyzes VBA from document files | `analyze_macros` -- boolean that determines if macros should be analyzed (defaults to `True`) |
+| ScanVhd | Collects and extracts files from VHD/VHDX files | `limit` -- maximum number of files to extract (defaults to `100`) | [Ryan O'Horo](https://github.com/ryanohoro) |
+| ScanVsto | Collects and extracts metadata from VSTO files | N/A |
+| ScanX509 | Collects metadata from x509 and CRL files | `type` -- string that determines the type of x509 certificate being scanned (no default, assigned as either "der" or "pem" depending on flavor) |
+| ScanXL4MA | Analyzes and parses Excel 4 Macros from XLSX files | `type` -- string that determines the type of x509 certificate being scanned (no default, assigned as either "der" or "pem" depending on flavor) | Ryan Borre
+| ScanXml | Log metadata and extract files from XML files | `extract_tags` -- list of XML tags that will have their text extracted as child files (defaults to empty list)
`metadata_tags` -- list of XML tags that will have their text logged as metadata (defaults to empty list) |
+| ScanYara | Scans files with YARA rules | `location` -- location of the YARA rules file or directory (defaults to `/etc/strelka/yara/`)
`compiled` -- Enable use of compiled YARA rules, as well as the path.
`store_offset` -- Stores file offset for YARA match
`offset_meta_key` -- YARA meta key that must exist in the YARA rule for the offset to be stored.
`offset_padding` -- Amount of data to be stored before and after offset for additional context. |
+| ScanZip | Extracts files from zip archives | `limit` -- maximum number of files to extract (defaults to `1000`)
`password_file` -- location of passwords file for zip archives (defaults to `/etc/strelka/passwords.dat`) |
+| ScanZlib | Decompresses gzip files | N/A
## Tests
As Strelka consists of many scanners and dependencies for those scanners. Pytests are particularly valuable for testing the ongoing functionality of Strelka and it's scanners. Tests allow users to write test cases that verify the correct behavior of Strelka scanners to ensure that the scanners remain reliable and accurate. Additionally, using pytests can help streamline the development process, allowing developers to focus on writing new features and improvements for the scanners. Strelka contains a set of standard test fixture files that represent the types of files Strelka ingests. Test fixtures can also be loaded remotely with the helper functions `get_remote_fixture` and `get_remote_fixture_archive` for scanner tests that need malicious samples.
diff --git a/pyproject.toml b/pyproject.toml
index df407b2a..00143d01 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,11 @@
name = "strelka-worker"
version = "0.23.10.19"
description = "Strelka's backend Python worker"
-authors = ["Paul.Hutelmyer@Target.com", "Ryan.Ohoro@Target.com", "Sara.Kalupa@Target.com"]
+authors = [
+ "Paul Hutelmyer ",
+ "Ryan Ohoro ",
+ "Sara Kalupa "
+]
[tool.poetry.dependencies]
python = ">=3.10,<=3.12"
diff --git a/src/python/strelka/scanners/scan_iqy.py b/src/python/strelka/scanners/scan_iqy.py
new file mode 100644
index 00000000..5223e42e
--- /dev/null
+++ b/src/python/strelka/scanners/scan_iqy.py
@@ -0,0 +1,64 @@
+# Description #
+# This scanner is looking for iqy files used with excel.
+#
+# author: Tasha Taylor
+# date: 10/30/2023
+
+import re
+
+from strelka import strelka
+
+
+class ScanIqy(strelka.Scanner):
+ """
+ Extract URLs from IQY files.
+
+ IQY files, or Excel Web Query Internet Inquire files, are typically created from a VBA Web Query output.
+ The following is a typical format:
+ WEB
+ 1
+ [URL]
+ [optional parameters]
+ Additional properties can be found at: https://learn.microsoft.com/en-us/office/vba/api/excel.querytable
+ """
+
+ def scan(self, data, file, options, expire_at):
+ try:
+ # Regular expression for detecting a URL-like pattern
+ address_pattern = re.compile(
+ r"\b(?:http|https|ftp|ftps|file|smb)://\S+|"
+ r"\\{2}\w+\\(?:[\w$]+\\)*[\w$]+",
+ re.IGNORECASE,
+ )
+
+ # Attempt UTF-8 decoding first, fall back to latin-1 if necessary
+ try:
+ data = data.decode("utf-8")
+ except UnicodeDecodeError:
+ data = data.decode("latin-1")
+
+ # Split lines to review each record separately
+ data_lines = data.splitlines()
+
+ addresses = set()
+ # For each line, check if the line matches the address pattern.
+ # In a typical IQY file, the "WEB" keyword is at the beginning of the file,
+ # and what follows is usually just one URL with optional additional parameters.
+ # However, because we are iterating lines anyway, lets check for additional addresses anyway.
+ for entry in data_lines[1:]:
+ match = address_pattern.search(entry)
+ if match:
+ address = match.group().strip()
+ if address:
+ addresses.add(address)
+
+ # Evaluate if any addresses were found and assign the boolean result.
+ self.event["address_found"] = bool(addresses)
+
+ # Send all addresses to the IOC parser.
+ self.add_iocs(list(addresses), self.type.url)
+
+ except UnicodeDecodeError as e:
+ self.flags.append(f"Unicode decoding error: {e}")
+ except Exception as e:
+ self.flags.append(f"Unexpected exception: {e}")
diff --git a/src/python/strelka/tests/__init__.py b/src/python/strelka/tests/__init__.py
index 07870ab0..5873a94a 100644
--- a/src/python/strelka/tests/__init__.py
+++ b/src/python/strelka/tests/__init__.py
@@ -47,6 +47,10 @@ def run_test_scan(
expire_at=datetime.date.today(),
)
+ # If a scanner outputs IOCs, append them to the event for test coverage
+ if scanner.iocs:
+ scanner.event.update({"iocs": scanner.iocs})
+
return scanner.event
diff --git a/src/python/strelka/tests/fixtures/test.iqy b/src/python/strelka/tests/fixtures/test.iqy
new file mode 100644
index 00000000..63a7ef64
--- /dev/null
+++ b/src/python/strelka/tests/fixtures/test.iqy
@@ -0,0 +1,3 @@
+WEB
+1
+https://github.com/target/strelka/blob/master/docs/index.html // Test case: Valid HTTPS URL
\ No newline at end of file
diff --git a/src/python/strelka/tests/test_scan_iqy.py b/src/python/strelka/tests/test_scan_iqy.py
new file mode 100644
index 00000000..fad64fbd
--- /dev/null
+++ b/src/python/strelka/tests/test_scan_iqy.py
@@ -0,0 +1,40 @@
+from pathlib import Path
+from unittest import TestCase, mock
+
+from strelka.scanners.scan_iqy import ScanIqy as ScanUnderTest
+from strelka.tests import run_test_scan
+
+
+def test_scan_iqy(mocker):
+ """
+ Pass: Sample event matches output of scanner.
+ Failure: Unable to load file or sample event fails to match.
+ """
+
+ test_scan_event = {
+ "elapsed": mock.ANY,
+ "flags": [],
+ "address_found": True,
+ "iocs": [
+ {
+ "description": "",
+ "ioc": "github.com",
+ "ioc_type": "domain",
+ "scanner": "ScanIqy",
+ },
+ {
+ "description": "",
+ "ioc": "https://github.com/target/strelka/blob/master/docs/index.html",
+ "ioc_type": "url",
+ "scanner": "ScanIqy",
+ },
+ ],
+ }
+ scanner_event = run_test_scan(
+ mocker=mocker,
+ scan_class=ScanUnderTest,
+ fixture_path=Path(__file__).parent / "fixtures/test.iqy",
+ )
+
+ TestCase.maxDiff = None
+ TestCase().assertDictEqual(test_scan_event, scanner_event)
diff --git a/src/python/strelka/tests/test_scan_pcap.py b/src/python/strelka/tests/test_scan_pcap.py
index cff16309..794a230c 100644
--- a/src/python/strelka/tests/test_scan_pcap.py
+++ b/src/python/strelka/tests/test_scan_pcap.py
@@ -8,75 +8,79 @@
def test_scan_pcap(mocker):
- """
- Pass: Sample event matches output of scanner.
- Failure: Unable to load file or sample event fails to match.
- """
-
test_scan_event = {
"elapsed": mock.ANY,
"flags": [],
"total": {"files": 3, "extracted": 3},
"files": [
{
- "ts": 1673576655.41892,
- "fuid": "FOxTJwn9u5H1hBXn1",
- "tx_hosts": ["192.168.174.131"],
- "rx_hosts": ["192.168.174.1"],
- "conn_uids": mock.ANY,
- "source": "HTTP",
- "depth": 0,
"analyzers": unordered(["PE", "EXTRACT"]),
- "mime_type": "application/x-dosexec",
+ "depth": 0,
"duration": 0.00018906593322753906,
+ "extracted": "extract-1673576655.41892-HTTP-FOxTJwn9u5H1hBXn1",
+ "extracted_cutoff": False,
+ "fuid": "FOxTJwn9u5H1hBXn1",
+ "id.orig_h": "192.168.174.1",
+ "id.orig_p": 13147,
+ "id.resp_h": "192.168.174.131",
+ "id.resp_p": 8080,
"is_orig": False,
- "seen_bytes": 4096,
- "total_bytes": 4096,
+ "local_orig": True,
+ "mime_type": "application/x-dosexec",
"missing_bytes": 0,
"overflow_bytes": 0,
+ "seen_bytes": 4096,
+ "source": "HTTP",
"timedout": False,
- "extracted": "extract-1673576655.41892-HTTP-FOxTJwn9u5H1hBXn1",
- "extracted_cutoff": False,
+ "total_bytes": 4096,
+ "ts": 1673576655.41892,
+ "uid": mock.ANY,
},
{
- "ts": 1673576666.163778,
- "fuid": "FxYAi61ktBsEM4hpNd",
- "tx_hosts": ["192.168.174.131"],
- "rx_hosts": ["192.168.174.1"],
- "conn_uids": mock.ANY,
- "source": "HTTP",
- "depth": 0,
"analyzers": unordered(["EXTRACT"]),
- "mime_type": "image/jpeg",
+ "depth": 0,
"duration": 0.007551908493041992,
+ "extracted": "extract-1673576666.163778-HTTP-FxYAi61ktBsEM4hpNd",
+ "extracted_cutoff": False,
+ "fuid": "FxYAi61ktBsEM4hpNd",
+ "id.orig_h": "192.168.174.1",
+ "id.orig_p": 13162,
+ "id.resp_h": "192.168.174.131",
+ "id.resp_p": 8080,
"is_orig": False,
- "seen_bytes": 308566,
- "total_bytes": 308566,
+ "local_orig": True,
+ "mime_type": "image/jpeg",
"missing_bytes": 0,
"overflow_bytes": 0,
+ "seen_bytes": 308566,
+ "source": "HTTP",
"timedout": False,
- "extracted": "extract-1673576666.163778-HTTP-FxYAi61ktBsEM4hpNd",
- "extracted_cutoff": False,
+ "total_bytes": 308566,
+ "ts": 1673576666.163778,
+ "uid": mock.ANY,
},
{
- "ts": 1673576677.801391,
- "fuid": "FoNGFk1uRR9pVo9XKi",
- "tx_hosts": ["192.168.174.131"],
- "rx_hosts": ["192.168.174.1"],
- "conn_uids": mock.ANY,
- "source": "HTTP",
- "depth": 0,
"analyzers": unordered(["EXTRACT"]),
- "mime_type": "application/xml",
+ "depth": 0,
"duration": 0.0,
+ "extracted": "extract-1673576677.801391-HTTP-FoNGFk1uRR9pVo9XKi",
+ "extracted_cutoff": False,
+ "fuid": "FoNGFk1uRR9pVo9XKi",
+ "id.orig_h": "192.168.174.1",
+ "id.orig_p": 13176,
+ "id.resp_h": "192.168.174.131",
+ "id.resp_p": 8080,
"is_orig": False,
- "seen_bytes": 620,
- "total_bytes": 620,
+ "local_orig": True,
+ "mime_type": "application/xml",
"missing_bytes": 0,
"overflow_bytes": 0,
+ "seen_bytes": 620,
+ "source": "HTTP",
"timedout": False,
- "extracted": "extract-1673576677.801391-HTTP-FoNGFk1uRR9pVo9XKi",
- "extracted_cutoff": False,
+ "total_bytes": 620,
+ "ts": 1673576677.801391,
+ "uid": mock.ANY,
},
],
}
@@ -104,64 +108,73 @@ def test_scan_pcap_ng(mocker):
"total": {"files": 3, "extracted": 3},
"files": [
{
- "ts": 1673576655.41892,
- "fuid": "FOxTJwn9u5H1hBXn1",
- "tx_hosts": ["192.168.174.131"],
- "rx_hosts": ["192.168.174.1"],
- "conn_uids": mock.ANY,
- "source": "HTTP",
- "depth": 0,
"analyzers": unordered(["PE", "EXTRACT"]),
- "mime_type": "application/x-dosexec",
- "duration": 0.00018906593322753906,
+ "depth": 0,
+ "duration": mock.ANY,
+ "extracted": mock.ANY,
+ "extracted_cutoff": False,
+ "fuid": "FOxTJwn9u5H1hBXn1",
+ "id.orig_h": "192.168.174.1",
+ "id.orig_p": 13147,
+ "id.resp_h": "192.168.174.131",
+ "id.resp_p": 8080,
"is_orig": False,
- "seen_bytes": 4096,
- "total_bytes": 4096,
+ "local_orig": True,
+ "mime_type": "application/x-dosexec",
"missing_bytes": 0,
"overflow_bytes": 0,
+ "seen_bytes": 4096,
+ "source": "HTTP",
"timedout": False,
- "extracted": "extract-1673576655.41892-HTTP-FOxTJwn9u5H1hBXn1",
- "extracted_cutoff": False,
+ "total_bytes": 4096,
+ "ts": 1673576655.41892,
+ "uid": mock.ANY,
},
{
- "ts": 1673576666.163778,
- "fuid": "FxYAi61ktBsEM4hpNd",
- "tx_hosts": ["192.168.174.131"],
- "rx_hosts": ["192.168.174.1"],
- "conn_uids": mock.ANY,
- "source": "HTTP",
- "depth": 0,
"analyzers": unordered(["EXTRACT"]),
- "mime_type": "image/jpeg",
- "duration": 0.007551908493041992,
+ "depth": 0,
+ "duration": mock.ANY,
+ "extracted": mock.ANY,
+ "extracted_cutoff": False,
+ "fuid": "FxYAi61ktBsEM4hpNd",
+ "id.orig_h": "192.168.174.1",
+ "id.orig_p": 13162,
+ "id.resp_h": "192.168.174.131",
+ "id.resp_p": 8080,
"is_orig": False,
- "seen_bytes": 308566,
- "total_bytes": 308566,
+ "local_orig": True,
+ "mime_type": "image/jpeg",
"missing_bytes": 0,
"overflow_bytes": 0,
+ "seen_bytes": 308566,
+ "source": "HTTP",
"timedout": False,
- "extracted": "extract-1673576666.163778-HTTP-FxYAi61ktBsEM4hpNd",
- "extracted_cutoff": False,
+ "total_bytes": 308566,
+ "ts": 1673576666.163778,
+ "uid": mock.ANY,
},
{
- "ts": 1673576677.801391,
- "fuid": "FoNGFk1uRR9pVo9XKi",
- "tx_hosts": ["192.168.174.131"],
- "rx_hosts": ["192.168.174.1"],
- "conn_uids": mock.ANY,
- "source": "HTTP",
- "depth": 0,
"analyzers": unordered(["EXTRACT"]),
- "mime_type": "application/xml",
- "duration": 0.0,
+ "depth": 0,
+ "duration": mock.ANY,
+ "extracted": mock.ANY,
+ "extracted_cutoff": False,
+ "fuid": "FoNGFk1uRR9pVo9XKi",
+ "id.orig_h": "192.168.174.1",
+ "id.orig_p": 13176,
+ "id.resp_h": "192.168.174.131",
+ "id.resp_p": 8080,
"is_orig": False,
- "seen_bytes": 620,
- "total_bytes": 620,
+ "local_orig": True,
+ "mime_type": "application/xml",
"missing_bytes": 0,
"overflow_bytes": 0,
+ "seen_bytes": 620,
+ "source": "HTTP",
"timedout": False,
- "extracted": "extract-1673576677.801391-HTTP-FoNGFk1uRR9pVo9XKi",
- "extracted_cutoff": False,
+ "total_bytes": 620,
+ "ts": 1673576677.801391,
+ "uid": mock.ANY,
},
],
}
diff --git a/src/python/strelka/tests/test_scan_xl4ma.py b/src/python/strelka/tests/test_scan_xl4ma.py
index 39736836..18fec287 100644
--- a/src/python/strelka/tests/test_scan_xl4ma.py
+++ b/src/python/strelka/tests/test_scan_xl4ma.py
@@ -25,7 +25,20 @@ def test_scan_xl4ma(mocker):
"https://www.example.com/path/to/resource",
]
),
- "iocs": ["https://www.example.com/path/to/resource"],
+ "iocs": [
+ {
+ "ioc": "example.com",
+ "ioc_type": "domain",
+ "scanner": "ScanXl4ma",
+ "description": "extracted from excel4 macro",
+ },
+ {
+ "ioc": "https://www.example.com/path/to/resource",
+ "ioc_type": "url",
+ "scanner": "ScanXl4ma",
+ "description": "extracted from excel4 macro",
+ },
+ ],
}
scanner_event = run_test_scan(