From 6278d09f4cc057f439353dc11051e7292663bdae Mon Sep 17 00:00:00 2001 From: Tu Duong Quyet Date: Tue, 3 May 2022 10:55:16 +0700 Subject: [PATCH 1/3] - Add files via upload As a capstone project for the University of Minnesota, we added 3 algorithms to Strelka: Beyond End-of-file, Least Significant Bit, and Noise Floor. There was one additional dependency of OpenCV2. A version of OpenCV was already included, but was not the correct version for our implementation. We created several images that were designed to cause the tests to flag the image in the resulting strelka.log file. These were tested repeatedly to ensure Strelka doesn't crash and produces the correct output. - Add .gitatributes file to fix LE issues --- .gitattributes | 325 ++++++++++++++++++++++++++ build/python/backend/Dockerfile | 1 + build/python/backend/requirements.txt | 5 +- configs/python/backend/backend.yaml | 32 +++ scan_bmp_eof.py | 27 +++ scan_lsb.py | 51 ++++ scan_nf.py | 39 ++++ scan_png_eof.py | 26 +++ 8 files changed, 504 insertions(+), 2 deletions(-) create mode 100644 .gitattributes create mode 100644 scan_bmp_eof.py create mode 100644 scan_lsb.py create mode 100644 scan_nf.py create mode 100644 scan_png_eof.py diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..37541115 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,325 @@ +# Basic .gitattributes for a python repo. + +# Source files +# ============ +*.pxd text diff=python +*.py text diff=python +*.py3 text diff=python +*.pyw text diff=python +*.pyx text diff=python +*.pyz text diff=python +*.pyi text diff=python + +# Binary files +# ============ +*.db binary +*.p binary +*.pkl binary +*.pickle binary +*.pyc binary export-ignore +*.pyo binary export-ignore +*.pyd binary + +# Jupyter notebook +*.ipynb text + +# Note: .db, .p, and .pkl files are associated +# with the python modules ``pickle``, ``dbm.*``, +# ``shelve``, ``marshal``, ``anydbm``, & ``bsddb`` +# (among others). + +# Treat all Go files in this repo as binary, with no git magic updating +# line endings. Windows users contributing to Go will need to use a +# modern version of git and editors capable of LF line endings. + +*.go -text diff=golang + +# Common settings that generally should always be used with your language specific settings + +# Auto detect text files and perform LF normalization +* text=auto + +# +# The above will handle all files NOT found below +# + +# Documents +*.bibtex text diff=bibtex +*.doc diff=astextplain +*.DOC diff=astextplain +*.docx diff=astextplain +*.DOCX diff=astextplain +*.dot diff=astextplain +*.DOT diff=astextplain +*.pdf diff=astextplain +*.PDF diff=astextplain +*.rtf diff=astextplain +*.RTF diff=astextplain +*.md text diff=markdown +*.mdx text diff=markdown +*.tex text diff=tex +*.adoc text +*.textile text +*.mustache text +*.csv text +*.tab text +*.tsv text +*.txt text +*.sql text + +# Graphics +*.png binary +*.jpg binary +*.jpeg binary +*.gif binary +*.tif binary +*.tiff binary +*.ico binary +# SVG treated as text by default. +*.svg text +# If you want to treat it as binary, +# use the following line instead. +# *.svg binary +*.eps binary + +# Scripts +*.bash text eol=lf +*.fish text eol=lf +*.sh text eol=lf +*.zsh text eol=lf +# These are explicitly windows files and should use crlf +*.bat text eol=crlf +*.cmd text eol=crlf +*.ps1 text eol=crlf + +# Serialisation +*.json text +*.toml text +*.xml text +*.yaml text +*.yml text + +# Archives +*.7z binary +*.gz binary +*.tar binary +*.tgz binary +*.zip binary + +# Text files where line endings should be preserved +*.patch -text + +# +# Exclude files from exporting +# + +.gitattributes export-ignore +.gitignore export-ignore +.gitkeep export-ignore + +## GITATTRIBUTES FOR WEB PROJECTS +# +# These settings are for any web project. +# +# Details per file setting: +# text These files should be normalized (i.e. convert CRLF to LF). +# binary These files are binary and should be left untouched. +# +# Note that binary is a macro for -text -diff. +###################################################################### + +# Auto detect +## Handle line endings automatically for files detected as +## text and leave all files detected as binary untouched. +## This will handle all files NOT defined below. +* text=auto + +# Source code +*.bash text eol=lf +*.bat text eol=crlf +*.cmd text eol=crlf +*.coffee text +*.css text diff=css +*.htm text diff=html +*.html text diff=html +*.inc text +*.ini text +*.js text +*.json text +*.jsx text +*.less text +*.ls text +*.map text -diff +*.od text +*.onlydata text +*.php text diff=php +*.pl text +*.ps1 text eol=crlf +*.py text diff=python +*.rb text diff=ruby +*.sass text +*.scm text +*.scss text diff=css +*.sh text eol=lf +*.sql text +*.styl text +*.tag text +*.ts text +*.tsx text +*.xml text +*.xhtml text diff=html + +# Docker +Dockerfile text + +# Documentation +*.ipynb text +*.markdown text diff=markdown +*.md text diff=markdown +*.mdwn text diff=markdown +*.mdown text diff=markdown +*.mkd text diff=markdown +*.mkdn text diff=markdown +*.mdtxt text +*.mdtext text +*.txt text +AUTHORS text +CHANGELOG text +CHANGES text +CONTRIBUTING text +COPYING text +copyright text +*COPYRIGHT* text +INSTALL text +license text +LICENSE text +NEWS text +readme text +*README* text +TODO text + +# Templates +*.dot text +*.ejs text +*.erb text +*.haml text +*.handlebars text +*.hbs text +*.hbt text +*.jade text +*.latte text +*.mustache text +*.njk text +*.phtml text +*.svelte text +*.tmpl text +*.tpl text +*.twig text +*.vue text + +# Configs +*.cnf text +*.conf text +*.config text +.editorconfig text +.env text +.gitattributes text +.gitconfig text +.htaccess text +*.lock text -diff +package.json text eol=lf +package-lock.json text -diff +pnpm-lock.yaml text eol=lf -diff +.prettierrc text +yarn.lock text -diff +*.toml text +*.yaml text +*.yml text +browserslist text +Makefile text +makefile text + +# Heroku +Procfile text + +# Graphics +*.ai binary +*.bmp binary +*.eps binary +*.gif binary +*.gifv binary +*.ico binary +*.jng binary +*.jp2 binary +*.jpg binary +*.jpeg binary +*.jpx binary +*.jxr binary +*.pdf binary +*.png binary +*.psb binary +*.psd binary +# SVG treated as an asset (binary) by default. +*.svg text +# If you want to treat it as binary, +# use the following line instead. +# *.svg binary +*.svgz binary +*.tif binary +*.tiff binary +*.wbmp binary +*.webp binary + +# Audio +*.kar binary +*.m4a binary +*.mid binary +*.midi binary +*.mp3 binary +*.ogg binary +*.ra binary + +# Video +*.3gpp binary +*.3gp binary +*.as binary +*.asf binary +*.asx binary +*.avi binary +*.fla binary +*.flv binary +*.m4v binary +*.mng binary +*.mov binary +*.mp4 binary +*.mpeg binary +*.mpg binary +*.ogv binary +*.swc binary +*.swf binary +*.webm binary + +# Archives +*.7z binary +*.gz binary +*.jar binary +*.rar binary +*.tar binary +*.zip binary + +# Fonts +*.ttf binary +*.eot binary +*.otf binary +*.woff binary +*.woff2 binary + +# Executables +*.exe binary +*.pyc binary + +# RC files (like .babelrc or .eslintrc) +*.*rc text + +# Ignore files (like .npmignore or .gitignore) +*.*ignore text diff --git a/build/python/backend/Dockerfile b/build/python/backend/Dockerfile index 7dcfa3dc..657fd719 100644 --- a/build/python/backend/Dockerfile +++ b/build/python/backend/Dockerfile @@ -16,6 +16,7 @@ RUN apt-get -qq update && \ curl \ gcc \ git \ + libglu1-mesa \ libtool \ make \ swig \ diff --git a/build/python/backend/requirements.txt b/build/python/backend/requirements.txt index 95fb07f9..4bb73e52 100644 --- a/build/python/backend/requirements.txt +++ b/build/python/backend/requirements.txt @@ -13,7 +13,7 @@ inflection==0.5.1 interruptingcow==0.8 jsbeautifier==1.13.13 libarchive-c==2.9 -lief==0.12.1 +lief==0.11.4 lxml==4.6.5 M2Crypto==0.37.1 nested-lookup==0.2.22 @@ -21,7 +21,8 @@ numpy==1.21.0 olefile==0.46 oletools==0.56.1 opencv-python==4.5.1.48 -PyMuPDF==1.19.6 +opencv-contrib-python==4.5.3.56 +PyMuPDF==1.18.0 pefile==2019.4.18 pgpdump3==1.5.2 pyelftools==0.27 diff --git a/configs/python/backend/backend.yaml b/configs/python/backend/backend.yaml index 6342affe..adfa0872 100644 --- a/configs/python/backend/backend.yaml +++ b/configs/python/backend/backend.yaml @@ -29,6 +29,12 @@ scanners: - 'text/x-msdos-batch' - 'batch_file' priority: 5 + 'ScanBmpEof': + - positive: + flavors: + - 'image/x-ms-bmp' + - 'bmp_file' + priority: 5 'ScanBzip2': - positive: flavors: @@ -285,6 +291,16 @@ scanners: flavors: - 'lnk_file' priority: 5 + 'ScanLsb': + - positive: + flavors: + - 'image/png' + - 'png_file' + - 'image/jpeg' + - 'jpeg_file' + - 'image/x-ms-bmp' + - 'bmp_file' + priority: 5 'ScanLzma': - positive: flavors: @@ -314,6 +330,16 @@ scanners: priority: 5 options: server: 'strelka_mmrpc_1:33907' + 'ScanNf': + - positive: + flavors: + - 'image/png' + - 'png_file' + - 'image/jpeg' + - 'jpeg_file' + - 'image/x-ms-bmp' + - 'bmp_file' + priority: 5 'ScanOcr': - positive: flavors: @@ -394,6 +420,12 @@ scanners: - 'ProgramArguments' - 'RunAtLoad' - 'StartInterval' + 'ScanPngEof': + - positive: + flavors: + - 'image/png' + - 'png_file' + priority: 5 'ScanQr': - positive: flavors: diff --git a/scan_bmp_eof.py b/scan_bmp_eof.py new file mode 100644 index 00000000..89bc14c5 --- /dev/null +++ b/scan_bmp_eof.py @@ -0,0 +1,27 @@ +from strelka import strelka + +class ScanBmpEof(strelka.Scanner): + """ + Take the data of the BMP image, parse it, and determine if data is stored beyond + the expected marker. + """ + def scan(self, data, file, options, expire_at): + expectedSize = int.from_bytes(data[2:6], "little") + actualSize = len(data) + if expectedSize != actualSize: + self.event['trailer_index'] = expectedSize + trailer_bytes_data = data[expectedSize:] + extract_file = strelka.File( + source=self.name, + ) + + for c in strelka.chunk_string(trailer_bytes_data): + self.upload_to_coordinator( + extract_file.pointer, + c, + expire_at, + ) + self.event['BMP_EOF'] = data[expectedSize:] + self.files.append(extract_file) + else: + self.flags.append('no_trailer') \ No newline at end of file diff --git a/scan_lsb.py b/scan_lsb.py new file mode 100644 index 00000000..aab0302a --- /dev/null +++ b/scan_lsb.py @@ -0,0 +1,51 @@ +import cv2 +import numpy as np + +from strelka import strelka + +class ScanLsb(strelka.Scanner): + """This scanner checks if there is any hidden strings at the end of each RGB value""" + + def scan(self,data,file,options, expire_at): + ans=False + image = np.fromstring(data, np.uint8) + image = cv2.imdecode(image, cv2.IMREAD_COLOR) + bits = self._get_bits(image) + bytes_ = self._get_bytes(bits) + chars = [] + chars.append(self._convert_bytes_to_text(bytes_)) + flag=(''.join(chars).encode('ascii', 'ignore')) + if (len(flag)>1): + ans=True + self.event['lsb'] = ans + #print("This Image might have something stored in") + else: + extract_file = strelka.File ( + source = self.name + ) + self.event['lsb'] = ans + + def _get_bits(self, img): + h, w, t = img.shape + bits = '' + + for x in range(0, h): + for y in range(0, w): + l=img[x,y] + length=len(l) + for k in l: + bits += bin(k)[-1] + return bits + + def _convert_bytes_to_text(self, bytes_): + asc = "" + for byte_ in bytes_: + asc += chr(int(byte_, 2)) + return asc + + def _get_bytes(self, bits): + bytes_ = [] + for i in range(int(len(bits) / 8)): + bytes_.append(bits[i * 8:(i + 1) * 8]) + #print(bytes_) + return bytes_ \ No newline at end of file diff --git a/scan_nf.py b/scan_nf.py new file mode 100644 index 00000000..94732bad --- /dev/null +++ b/scan_nf.py @@ -0,0 +1,39 @@ +from strelka import strelka +import cv2 as cv +import numpy as np + +class ScanNf(strelka.Scanner): + """ + Converts RGB image into the HSV (Hue, Saturation, Value) Color Space + to determine the noise floor of the image. + + This algorithm can be modified to be more/less strict by changing + the following variables in the source code: + p = minimum saturation percentage threshold per pixel (value between 0 and 1). + s_thr = minimum percentage threshold for all the pixels in the image. + + Current Setting: At least 25% (s_thr) of pixels must have a saturation value of at least 5% (p) + + The higher the value for both variables, the more strict the algorithm is. + """ + def scan(self, data, file, options, expire_at): + # Convert image to HSV color space + np_array = np.fromstring(data, np.uint8) + np_image = cv.imdecode(np_array, cv.IMREAD_COLOR) + image = cv.cvtColor(np_image, cv.COLOR_BGR2HSV) + + # Calculate histogram of saturation channel + s = cv.calcHist([image], [1], None, [256], [0, 256]) + + # Calculate percentage of pixels with saturation >= p + p = 0.05 + s_perc = float(np.sum(s[int(p * 255.0):-1])) / float(np.prod(image.shape[0:2])) + + # Percentage threshold; above: valid image, below: noise + s_thr = 0.25 + self.event['percentage'] = s_perc + self.event['threshold'] = s_thr + if s_perc < s_thr: + self.event['noise_floor'] = True # Potentially dangerous + else: + self.event['noise_floor'] = False # Not dangerous \ No newline at end of file diff --git a/scan_png_eof.py b/scan_png_eof.py new file mode 100644 index 00000000..fcbd26be --- /dev/null +++ b/scan_png_eof.py @@ -0,0 +1,26 @@ +from strelka import strelka + +class ScanPngEof(strelka.Scanner): + """ Extract data embended in PNG files. + + This scanner extracts data that is inserted past the PNG file end + """ + def scan(self, data, file, options, expire_at): + datalen = len(data) + if (data[datalen - 1] == b'\x82') and (data[datalen - 2] == b'\x60') and (data[len(data) - 3] == b'\x42'): + # file DOES NOT have data after EOF, found end of file + self.flags.append('no_trailer') + else: # the file DOES have data after EOF, did not find end of file + trailer_index = data.rfind(b'\x42\x60\x82') + if trailer_index == -1 : + self.event['end_index'] = -1 # didn't find the offical ending of the file + else: + trailer_index = trailer_index + 3 + self.event['trailer_index'] = trailer_index + + extract_file = strelka.File ( + source = self.name + ) + self.event['PNG_EOF'] = data[trailer_index:] + + self.files.append(extract_file) \ No newline at end of file From bc282a66b7b1d522977a567527ee43afb905fe26 Mon Sep 17 00:00:00 2001 From: Paul Hutelmyer Date: Sun, 15 May 2022 16:44:08 -0400 Subject: [PATCH 2/3] Updating locations --- scan_bmp_eof.py => src/python/strelka/scanners/scan_bmp_eof.py | 0 scan_lsb.py => src/python/strelka/scanners/scan_lsb.py | 0 scan_nf.py => src/python/strelka/scanners/scan_nf.py | 0 scan_png_eof.py => src/python/strelka/scanners/scan_png_eof.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename scan_bmp_eof.py => src/python/strelka/scanners/scan_bmp_eof.py (100%) rename scan_lsb.py => src/python/strelka/scanners/scan_lsb.py (100%) rename scan_nf.py => src/python/strelka/scanners/scan_nf.py (100%) rename scan_png_eof.py => src/python/strelka/scanners/scan_png_eof.py (100%) diff --git a/scan_bmp_eof.py b/src/python/strelka/scanners/scan_bmp_eof.py similarity index 100% rename from scan_bmp_eof.py rename to src/python/strelka/scanners/scan_bmp_eof.py diff --git a/scan_lsb.py b/src/python/strelka/scanners/scan_lsb.py similarity index 100% rename from scan_lsb.py rename to src/python/strelka/scanners/scan_lsb.py diff --git a/scan_nf.py b/src/python/strelka/scanners/scan_nf.py similarity index 100% rename from scan_nf.py rename to src/python/strelka/scanners/scan_nf.py diff --git a/scan_png_eof.py b/src/python/strelka/scanners/scan_png_eof.py similarity index 100% rename from scan_png_eof.py rename to src/python/strelka/scanners/scan_png_eof.py From 5b3d070daa4680f54c447fa469245ad43ed578f6 Mon Sep 17 00:00:00 2001 From: Paul Hutelmyer Date: Sun, 15 May 2022 16:53:35 -0400 Subject: [PATCH 3/3] Update CHANGELOG.md --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2a9bff8..b83cc591 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,13 @@ # Changelog Changes to the project will be tracked in this file via the date of change. +## 2022-05-15 +### Added +- Added `ScanBMPEoF` steganalysis scanner. (University of Minnesota) +- Added `ScanLSB` steganalysis scanner. (University of Minnesota) +- Added `ScanNF` steganalysis scanner. (University of Minnesota) +- Added `ScanPNGEoF` steganalysis scanner. (University of Minnesota) + ## 2022-04-26 ### Changed - Fixed / updated `ScanPdf` with new functionality. May require current implementations to change parsing. (Ryan Borre)