Skip to content

Commit

Permalink
Merge pull request #269 from ryanohoro/ScanDmg
Browse files Browse the repository at this point in the history
Add DMG Scanner
  • Loading branch information
phutelmyer authored Dec 30, 2022
2 parents acdcb98 + 6856309 commit fbeeb66
Show file tree
Hide file tree
Showing 13 changed files with 526 additions and 15 deletions.
17 changes: 10 additions & 7 deletions build/python/backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@ ARG YARA_PYTHON_VERSION=4.2.3
ARG CAPA_VERSION=4.0.1
ARG EXIFTOOL_VERSION=12.52

# Set up package pinning for future releases (kinetic 22.10, 7zip 22.01+dfsg-2)
COPY ./build/python/backend/pin.pref /etc/apt/preferences.d/pin.pref
COPY ./build/python/backend/kinetic.list /etc/apt/sources.list.d/kinetic.list

# Update packages
RUN apt-get -qq update && \
RUN apt-get -q update && \
# Install build packages
apt-get install --no-install-recommends -qq \
apt-get install -q -y --no-install-recommends \
automake \
build-essential \
cmake \
Expand All @@ -27,7 +31,7 @@ RUN apt-get -qq update && \
python-is-python3 \
pkg-config \
# Install runtime packages
7zip=21.07+dfsg-4 \
7zip \
antiword \
libarchive-dev \
libfuzzy-dev \
Expand Down Expand Up @@ -90,8 +94,8 @@ RUN apt-get -qq update && \


# Install JTR
RUN apt-get -qq update \
&& apt-get install -qq --no-install-recommends -y \
RUN apt-get -q update \
&& apt-get install -q -y --no-install-recommends \
ca-certificates \
libssl-dev \
zlib1g-dev \
Expand Down Expand Up @@ -155,8 +159,7 @@ RUN cd /strelka/ && \
USER 1001

# Run build checks
RUN set -x && \
echo '[+] Run checks' && \
RUN echo '[+] Run checks' && \
cd /strelka/strelka/ && \
python3 -m pytest -s tests/ && \
echo '[+] Done'
Expand Down
1 change: 1 addition & 0 deletions build/python/backend/kinetic.list
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
deb http://archive.ubuntu.com/ubuntu kinetic main restricted universe multiverse
7 changes: 7 additions & 0 deletions build/python/backend/pin.pref
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Package: 7zip
Pin: release n=jammy
Pin-Priority: 100

Package: *
Pin: release n=kinetic
Pin-Priority: 200
7 changes: 7 additions & 0 deletions configs/python/backend/backend.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ scanners:
# options:
# tmp_directory: '/dev/shm/'
# location: '/etc/capa/'
'ScanDmg':
- positive:
flavors:
- 'dmg_disk_image'
# - 'dmg_encrypted_disk_image'
- 'hfsplus_disk_image'
priority: 5
'ScanDocx':
- positive:
flavors:
Expand Down
31 changes: 31 additions & 0 deletions configs/python/backend/taste/taste.yara
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,24 @@ rule cpio_file {
$a at 0
}

rule dmg_disk_image {
meta:
type = "archive"
strings:
$koly = { 6B 6F 6C 79 } // koly
condition:
$koly at filesize - 0x200
}

rule dmg_encrypted_disk_image {
meta:
type = "archive"
strings:
$v1 = { 65 6E 63 72 63 64 73 61 00 } // encrcdsa - v1
$v2 = { 63 64 73 61 65 6E 63 72 00 } // cdsaencr - v2
condition:
$v1 at 0 or $v2 at 0
}

rule encrypted_zip
{
Expand Down Expand Up @@ -92,6 +110,19 @@ rule encrypted_word_document
any of them
}

rule hfsplus_disk_image {
meta:
type = "archive"
reference = "https://developer.apple.com/library/archive/technotes/tn/tn1150.html"
reference = "https://fossies.org/linux/file/magic/Magdir/macintosh"
strings:
$a = { 48 2B 00 04 } // H+ Non-bootable
$b = { 48 2B 4C 78 } // H+Lx Bootable
condition:
$a at 0x400 or
$b at 0x408
}

rule iso_file {
meta:
type = "archive"
Expand Down
1 change: 1 addition & 0 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,7 @@ The table below describes each scanner and its options. Each scanner has the hid
| ScanBzip2 | Decompresses bzip2 files | N/A |
| ScanCapa | Analyzes executable files with FireEye [capa](https://github.com/fireeye/capa) | "tempfile_directory" -- location where `tempfile` will write temporary files (defaults to "/tmp/")<br>"location" -- location of the capa rules file or directory (defaults to "/etc/capa/") |
| ScanCuckoo | Sends files to a Cuckoo sandbox | "url" -- URL of the Cuckoo sandbox (defaults to None)<br>"priority" -- Cuckoo priority assigned to the task (defaults to 3)<br>"timeout" -- amount of time (in seconds) to wait for the task to upload (defaults to 10)<br>"unique" -- boolean that tells Cuckoo to only analyze samples that have not been analyzed before (defaults to True)<br>"username" -- username used for authenticating to Cuckoo (defaults to None, optionally read from environment variable "CUCKOO_USERNAME")<br>"password" -- password used for authenticating to Cuckoo (defaults to None, optionally read from environment variable "CUCKOO_PASSWORD") |
| ScanDmg | Collects metadata from Mac DMG and other disk images, and extracts archived files | "limit" -- maximum number of files to extract (defaults to 1000) |
| ScanDocx | Collects metadata and extracts text from docx files | "extract_text" -- boolean that determines if document text should be extracted as a child file (defaults to False) |
| ScanElf | Collects metadata from ELF files | N/A |
| ScanEmail | Collects metadata and extract files from email messages | N/A |
Expand Down
257 changes: 257 additions & 0 deletions src/python/strelka/scanners/scan_dmg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
import os
import pathlib
import re
import shutil
import subprocess
import tempfile

from strelka import strelka


class ScanDmg(strelka.Scanner):
"""Extracts files from DMG images"""

EXCLUDED_ROOT_DIRS = ["[SYSTEM]"]

def scan(self, data, file, options, expire_at):
file_limit = options.get("limit", 1000)
tmp_directory = options.get("tmp_file_directory", "/tmp/")
scanner_timeout = options.get("scanner_timeout", 150)

self.event["total"] = {"files": 0, "extracted": 0}
self.event["files"] = []
# self.event["hidden_dirs"] = []
self.event["meta"] = {}

try:
self.extract_7zip(
data, tmp_directory, scanner_timeout, expire_at, file_limit
)

except Exception:
self.flags.append("dmg_7zip_extract_error")

def extract_7zip(self, data, tmp_dir, scanner_timeout, expire_at, file_limit):
"""Decompress input file to /tmp with 7zz, send files to coordinator"""

# Check if 7zip package is installed
if not shutil.which("7zz"):
self.flags.append("dmg_7zip_not_installed_error")
return

with tempfile.NamedTemporaryFile(dir=tmp_dir, mode="wb") as tmp_data:
tmp_data.write(data)
tmp_data.flush()
tmp_data.seek(0)

if not tmp_data:
self.flags.append("dmg_7zip_tmp_error")
return

try:
with tempfile.TemporaryDirectory() as tmp_extract:

try:
(stdout, stderr) = subprocess.Popen(
["7zz", "x", tmp_data.name, f"-o{tmp_extract}"],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
).communicate(timeout=scanner_timeout)

except Exception:
self.flags.append("dmg_7zip_extract_process_error")

def get_all_items(root, exclude=None):
"""Iterates through filesystem paths"""
if exclude is None:
exclude = []
for item in root.iterdir():
if item.name in exclude:
continue
yield item
if item.is_dir():
yield from get_all_items(item)

# Iterate over extracted files, except excluded paths
for name in get_all_items(
pathlib.Path(tmp_extract), self.EXCLUDED_ROOT_DIRS
):
if not name.is_file():
continue

print(name)
# Skip duplicate files created with these extended attributes
if str(name).endswith(":com.apple.quarantine") or str(name).endswith(":com.apple.FinderInfo"):
continue

if self.event["total"]["extracted"] >= file_limit:
self.flags.append("dmg_file_limit_error")
break

try:
self.upload(name, expire_at)
self.event["total"]["extracted"] += 1
except Exception:
self.flags.append("dmg_file_upload_error")

except Exception:
self.flags.append("dmg_7zip_extract_error")

try:
(stdout, stderr) = subprocess.Popen(
["7zz", "l", tmp_data.name],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
).communicate(timeout=scanner_timeout)

self.parse_7zip_stdout(stdout.decode("utf-8"), file_limit)

except Exception:
self.flags.append("dmg_7zip_output_error")
return

def parse_7zip_stdout(self, output_7zip, file_limit):
"""Parse 7zz output, create metadata"""

mode = None

try:
output_lines = output_7zip.splitlines()

# 7-Zip (z) 21.07 (x64) : Copyright (c) 1999-2021 Igor Pavlov : 2021-12-26
regex_7zip_version = re.compile(r"^7-Zip[^\d]+(\d+\.\d+)")

# --/----
regex_mode_properties = re.compile(r"^(--|----)$")

# Comment =
regex_property = re.compile(r"^(.+) = (.+)$")

# Date Time Attr Size Compressed Name
regex_mode_files = re.compile(
r"\s+Date\s+Time\s+Attr\s+Size\s+Compressed\s+Name"
)

# 2022-12-05 17:23:59 ....A 100656 102400 lorem.txt
regex_file = re.compile(
r"(?P<datetime>\d+-\d+-\d+\s\d+:\d+:\d+)\s+(?P<modes>[A-Z.]{5})(?:\s+(?P<size>\d+))?(?:\s+(?P<compressed>\d+))?\s+(?P<name>.+)"
)

def parse_file_modes(file_modes):

file_mode_list = []

for file_mode in file_modes:
if file_mode == "D":
file_mode_list.append("directory")
elif file_mode == "R":
file_mode_list.append("readonly")
elif file_mode == "H":
file_mode_list.append("hidden")
elif file_mode == "S":
file_mode_list.append("system")
elif file_mode == "A":
file_mode_list.append("archivable")

return file_mode_list

partition = {}

for output_line in output_lines:
if output_line:

# Properties section
match = regex_mode_properties.match(output_line)
if match:
if "path" in partition.keys():
if not self.event.get("meta", {}).get("partitions", []):
self.event["meta"]["partitions"] = []
self.event["meta"]["partitions"].append(partition)
partition = {}
mode = "properties"

# File section
match = regex_mode_files.match(output_line)
if match:
# Wrap up final partition
if "path" in partition.keys():
if not self.event.get("meta", {}).get("partitions", []):
self.event["meta"]["partitions"] = []
self.event["meta"]["partitions"].append(partition)
partition = {}
mode = "files"

# Header section
if not mode:

match = regex_7zip_version.match(output_line)
if match:
version = regex_7zip_version.match(output_line).group(1)
self.event["meta"]["7zip_version"] = version

continue

elif mode == "properties":

# Collect specific properties
match = regex_property.match(output_line)
if match:

if match.group(1) == "Label":
partition["label"] = match.group(2)
elif match.group(1) == "Path":
partition["path"] = match.group(2)
elif match.group(1) == "Type":
partition["type"] = match.group(2)
elif match.group(1) == "Created":
partition["created"] = match.group(2)
elif match.group(1) == "Creator Application":
partition["creator_application"] = match.group(2)
elif match.group(1) == "File System":
partition["file_system"] = match.group(2)

elif mode == "files":
match = regex_file.match(output_line)
if match:
modes_list = parse_file_modes(match.group("modes"))

# Skip excluded paths
if (
os.path.normpath(match.group("name")).split(
os.path.sep
)[0]
in self.EXCLUDED_ROOT_DIRS
):
continue

# No DMG sample available has a file property of hidden
#if "hidden" in modes_list and "directory" in modes_list:
# self.event["hidden_dirs"].append(match.group("name"))

if "directory" not in modes_list:
self.event["total"]["files"] += 1
self.event["files"].append(
{
"filename": match.group("name"),
"size": match.group("size"),
"datetime": match.group("datetime"),
}
)

except Exception:
self.flags.append("dmg_7zip_parse_error")
return

def upload(self, name, expire_at):
"""Send extracted file to coordinator"""
with open(name, "rb") as extracted_file:
extract_file = strelka.File(
source=self.name,
)

for c in strelka.chunk_string(extracted_file.read()):
self.upload_to_coordinator(
extract_file.pointer,
c,
expire_at,
)
2 changes: 0 additions & 2 deletions src/python/strelka/scanners/scan_vhd.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,6 @@ def parse_file_modes(file_modes):
self.event["hidden_dirs"].append(match.group("name"))

if "directory" not in modes_list:
if self.event["total"]["extracted"] >= file_limit:
break
self.event["total"]["files"] += 1
self.event["files"].append(
{
Expand Down
Binary file added src/python/strelka/tests/fixtures/test.dmg
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit fbeeb66

Please sign in to comment.