From cf62bf7fa895b78ffc2e69eba8ef361befaab7ab Mon Sep 17 00:00:00 2001 From: jlanson Date: Fri, 25 Oct 2024 16:15:13 -0400 Subject: [PATCH] feat: added binary analysis plugin --- Cargo.lock | 33 +++- Cargo.toml | 3 +- config/Hipcheck.kdl | 3 +- plugins/binary/Cargo.toml | 22 +++ plugins/binary/plugin.kdl | 10 + plugins/binary/src/binary_detector.rs | 177 +++++++++++++++++ plugins/binary/src/error/context.rs | 160 +++++++++++++++ plugins/binary/src/error/mod.rs | 275 ++++++++++++++++++++++++++ plugins/binary/src/fs.rs | 24 +++ plugins/binary/src/main.rs | 129 ++++++++++++ 10 files changed, 826 insertions(+), 10 deletions(-) create mode 100644 plugins/binary/Cargo.toml create mode 100644 plugins/binary/plugin.kdl create mode 100644 plugins/binary/src/binary_detector.rs create mode 100644 plugins/binary/src/error/context.rs create mode 100644 plugins/binary/src/error/mod.rs create mode 100644 plugins/binary/src/fs.rs create mode 100644 plugins/binary/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 545f2e19..e5302e06 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -315,6 +315,23 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" +[[package]] +name = "binary" +version = "0.1.0" +dependencies = [ + "clap", + "content_inspector", + "hipcheck-sdk", + "log", + "pathbuf", + "schemars", + "serde", + "serde_json", + "tokio", + "toml", + "walkdir", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -2811,18 +2828,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.210" +version = "1.0.213" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.213" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5" dependencies = [ "proc-macro2", "quote", @@ -2842,9 +2859,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ "itoa", "memchr", @@ -3268,9 +3285,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.40.0" +version = "1.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" +checksum = "145f3413504347a2be84393cc8a7d2fb4d863b375909ea59f2158261aa258bbb" dependencies = [ "backtrace", "bytes", diff --git a/Cargo.toml b/Cargo.toml index 4cc6aed3..acbb555b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,8 @@ members = [ "plugins/fuzz", "plugins/entropy", "plugins/linguist", - "plugins/review" + "plugins/review", + "plugins/binary" ] # Make sure Hipcheck is run with `cargo run`. diff --git a/config/Hipcheck.kdl b/config/Hipcheck.kdl index fdc0fc4f..8cbbb838 100644 --- a/config/Hipcheck.kdl +++ b/config/Hipcheck.kdl @@ -19,8 +19,9 @@ analyze { category "practices" { analysis "mitre/activity" policy="(lte $ 52)" weight=3 - analysis "mitre/binary" policy="(eq 0 (count $))" { + analysis "mitre/binary" { binary-file "./config/Binary.toml" + binary-file-threshold "0" } analysis "mitre/fuzz" policy="(eq #t $)" analysis "mitre/review" policy="(lte $ 0.05)" diff --git a/plugins/binary/Cargo.toml b/plugins/binary/Cargo.toml new file mode 100644 index 00000000..be8dfab7 --- /dev/null +++ b/plugins/binary/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "binary" +version = "0.1.0" +license = "Apache-2.0" +edition = "2021" +publish = false + +[dependencies] +clap = { version = "4.5.20", features = ["derive"] } +content_inspector = "0.2.4" +hipcheck-sdk = { version = "0.1.0", path = "../../sdk/rust", features = ["macros"] } +log = "0.4.22" +pathbuf = "1.0.0" +schemars = "0.8.21" +serde = "1.0.213" +serde_json = "1.0.132" +tokio = { version = "1.41.0", features = ["rt"] } +toml = "0.8.19" +walkdir = "2.5.0" + +[dev-dependencies] +hipcheck-sdk = { path = "../../sdk/rust", features = ["mock_engine"] } diff --git a/plugins/binary/plugin.kdl b/plugins/binary/plugin.kdl new file mode 100644 index 00000000..fc7bb7a1 --- /dev/null +++ b/plugins/binary/plugin.kdl @@ -0,0 +1,10 @@ +publisher "mitre" +name "binary" +version "0.1.0" +license "Apache-2.0" +entrypoint { + on arch="aarch64-apple-darwin" "./hc-mitre-binary" + on arch="x86_64-apple-darwin" "./hc-mitre-binary" + on arch="x86_64-unknown-linux-gnu" "./hc-mitre-binary" + on arch="x86_64-pc-windows-msvc" "./hc-mitre-binary" +} diff --git a/plugins/binary/src/binary_detector.rs b/plugins/binary/src/binary_detector.rs new file mode 100644 index 00000000..8a44b968 --- /dev/null +++ b/plugins/binary/src/binary_detector.rs @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: Apache-2.0 + +use crate::{ + error::{Context, Result}, + fs::read_toml, +}; +use content_inspector::{inspect, ContentType}; +use serde::{de::Visitor, Deserialize, Deserializer}; +use std::{ + fmt, + fmt::Formatter, + fs::File, + io::{prelude::Read, BufReader}, + path::{Path, PathBuf}, + result::Result as StdResult, +}; +use walkdir::{DirEntry, WalkDir}; + +#[derive(Debug, PartialEq, Eq)] +pub struct BinaryFileDetector { + extensions: Vec, +} + +impl BinaryFileDetector { + /// Constructs a new `BinaryFileDetector` from the `Binary.toml` file. + pub fn load>(binary_config_file: P) -> crate::error::Result { + fn inner(binary_config_file: &Path) -> crate::error::Result { + let extensions_file: ExtensionsFile = read_toml(binary_config_file) + .context("failed to read binary type defintions from Binary config file")?; + + let extensions = extensions_file.into_extensions(); + + Ok(BinaryFileDetector { extensions }) + } + + inner(binary_config_file.as_ref()) + } + + /// Determines if a binary file matches a known file extension. + /// + /// A match is assumed if an extension is not present. + pub fn is_likely_binary_file>(&self, file_name: P) -> bool { + fn inner(binary_file_detector: &BinaryFileDetector, file_name: &Path) -> bool { + let extension = match file_name.extension() { + Some(e) => format!(".{}", e.to_string_lossy()), + None => return true, + }; + for ext in &binary_file_detector.extensions { + if *ext == extension { + return true; + } + } + false + } + inner(self, file_name.as_ref()) + } +} + +#[derive(Debug, Deserialize)] +struct ExtensionsFile { + formats: Vec, +} + +#[derive(Debug, Deserialize)] +struct BinaryExtensions { + #[serde(default = "missing_bin_type")] + r#type: BinaryType, + extensions: Option>, +} + +impl ExtensionsFile { + /// Collects the known file extensions from Binary.toml + fn into_extensions(self) -> Vec { + let mut result = Vec::new(); + for file_format in self.formats { + if matches!( + file_format.r#type, + BinaryType::Object | BinaryType::Combination | BinaryType::Executable + ) { + match file_format.extensions { + None => continue, + Some(mut extensions) => result.extend(extensions.drain(0..)), + } + } + } + result + } +} + +#[derive(Debug)] +enum BinaryType { + Object, + Executable, + Combination, + Missing, +} + +fn missing_bin_type() -> BinaryType { + BinaryType::Missing +} + +impl<'de> Deserialize<'de> for BinaryType { + fn deserialize(deserializer: D) -> StdResult + where + D: Deserializer<'de>, + { + deserializer.deserialize_str(BinaryTypeVisitor) + } +} + +struct BinaryTypeVisitor; + +impl<'de> Visitor<'de> for BinaryTypeVisitor { + type Value = BinaryType; + fn expecting(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "'executable', 'object', or 'combination'") + } + + fn visit_str(self, value: &str) -> StdResult + where + E: serde::de::Error, + { + match value { + "combination" => Ok(BinaryType::Combination), + "object" => Ok(BinaryType::Object), + "executable" => Ok(BinaryType::Executable), + _ => Err(serde::de::Error::custom("unknown binary format")), + } + } +} + +/// Determines whether a DirEntry is a hidden file/directory. +/// +/// This is a Unix-style determination. +fn is_hidden(entry: &DirEntry) -> bool { + entry + .file_name() + .to_str() + .map(|s| s.starts_with('.')) + .unwrap_or(false) +} + +/// Fetches all files from `dir`. +fn fetch_entries(dir: &Path) -> Result> { + let walker = WalkDir::new(dir).into_iter(); + let mut entries: Vec = Vec::new(); + for entry in walker.filter_entry(|e| !is_hidden(e)) { + entries.push(entry?) + } + Ok(entries) +} + +/// Searches `dir` for any binary files and records their paths as Strings. +pub fn detect_binary_files(dir: &Path) -> Result> { + let path_entries = fetch_entries(dir)?; + let mut possible_binary: Vec = Vec::new(); + + // Inspect the first 4K of each file for telltale signs of binary data. + // Store a String of each Path that leads to a binary file. + const SAMPLE_SIZE: u64 = 4096; + for entry in path_entries { + // Skip directories, as they are neither text nor binary. + if entry.path().is_dir() { + continue; + } + + let working_file = File::open(entry.path())?; + let reader = BufReader::new(working_file); + let mut contents: Vec = Vec::new(); + let _bytes_read = reader.take(SAMPLE_SIZE).read_to_end(&mut contents)?; + if inspect(&contents) == ContentType::BINARY { + possible_binary.push(entry.path().strip_prefix(dir)?.into()); + } + } + + Ok(possible_binary) +} diff --git a/plugins/binary/src/error/context.rs b/plugins/binary/src/error/context.rs new file mode 100644 index 00000000..b9b0d0d3 --- /dev/null +++ b/plugins/binary/src/error/context.rs @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! A duplicate of the `anyhow::Context` extension trait intended to +//! make error propagation less verbose. + +use crate::error::{Error, Introspect}; +use std::error::Error as StdError; + +/// Functions for adding context to an error result +/// +/// The `Context` trait is based around the `Error` type defined in +/// this crate. Aside from the changed method names (collision +/// avoidance), it is a duplicate of the `anyhow::Context` trait. +/// Like its `anyhow` counterpart, this trait is sealed. +pub trait Context: sealed::Sealed { + /// Add context to an error + fn context(self, context: C) -> Result + where + C: Introspect + 'static; + + /// Lazily add context to an error + fn with_context(self, context_fn: F) -> Result + where + C: Introspect + 'static, + F: FnOnce() -> C; +} + +// `Context` is implemented only for those result types encountered +// when entering or traversing the query system: `Result` +// and `Result` for dynamic error types `E`. + +impl Context for Result { + fn context(self, context: C) -> Result + where + C: Introspect + 'static, + { + self.map_err(|err| err.context(context)) + } + + fn with_context(self, context_fn: F) -> Result + where + C: Introspect + 'static, + F: FnOnce() -> C, + { + self.map_err(|err| err.context(context_fn())) + } +} + +impl Context for Result +where + E: StdError + Send + Sync + 'static, +{ + fn context(self, context: C) -> Result + where + C: Introspect + 'static, + { + self.map_err(|err| Error::from(err).context(context)) + } + + fn with_context(self, context_fn: F) -> Result + where + C: Introspect + 'static, + F: FnOnce() -> C, + { + self.map_err(|err| Error::from(err).context(context_fn())) + } +} + +// Restricts implementations of `Context` only to those contained in +// this module +mod sealed { + use super::{Error, StdError}; + + pub trait Sealed {} + + impl Sealed for Result {} + + impl Sealed for Result where E: StdError + 'static {} +} + +#[cfg(test)] +mod tests { + //! Tests to ensure `Context` produces output correctly. + + use crate::error::Error; + use std::{io, io::ErrorKind}; + + // Message source root error with no context + #[test] + fn debug_behavior_msg_no_context() { + let error = Error::msg("error message"); + let debug = format!("{:?}", error); + let expected = "error message".to_string(); + assert_eq!(expected, debug); + } + + // Message source root error with a single context message + #[test] + fn debug_behavior_msg_single_context() { + let error = Error::msg("error message").context("context"); + let debug = format!("{:?}", error); + let expected = "context\n\nCaused by: \n 0: error message".to_string(); + assert_eq!(expected, debug); + } + + // Message source root error with multiple context messages + #[test] + fn debug_behavior_msg_multiple_context() { + let error = Error::msg("error message") + .context("context 1") + .context("context 2"); + let debug = format!("{:?}", error); + let expected = + "context 2\n\nCaused by: \n 0: context 1\n 1: error message".to_string(); + assert_eq!(expected, debug); + } + + // Dynamic error source with no context + #[test] + fn debug_behavior_std_no_context() { + let error = Error::from(io::Error::new( + ErrorKind::ConnectionRefused, + "connection refused", + )); + + let debug = format!("{:?}", error); + let expected = "connection refused".to_string(); + assert_eq!(expected, debug); + } + + // Dynamic error source with a single context message + #[test] + fn debug_behavior_std_single_context() { + let error = Error::from(io::Error::new( + ErrorKind::ConnectionRefused, + "connection refused", + )) + .context("context"); + + let debug = format!("{:?}", error); + let expected = "context\n\nCaused by: \n 0: connection refused".to_string(); + assert_eq!(expected, debug); + } + + // Dynamic error source with multiple context messages + #[test] + fn debug_behavior_std_multiple_context() { + let error = Error::from(io::Error::new( + ErrorKind::ConnectionRefused, + "connection refused", + )) + .context("context 1") + .context("context 2"); + + let debug = format!("{:?}", error); + let expected = + "context 2\n\nCaused by: \n 0: context 1\n 1: connection refused".to_string(); + assert_eq!(expected, debug); + } +} diff --git a/plugins/binary/src/error/mod.rs b/plugins/binary/src/error/mod.rs new file mode 100644 index 00000000..c6b3a8b5 --- /dev/null +++ b/plugins/binary/src/error/mod.rs @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: Apache-2.0 + +#![allow(unused)] + +//! An error type suitable for use in Hipcheck's query system. +//! +//! Salsa requires memoized query-value types to implement `Clone` and +//! `Eq`. The `anyhow::Error` type implements neither, making it +//! difficult to work with directly in this setting. +//! +//! Instead, the `Error` type defined in this crate ensures queries +//! which error out aren't retried, as it always compares as equal to +//! any other error. + +mod context; + +pub use crate::error::context::Context; +use std::{ + borrow::Cow, + error::Error as StdError, + fmt, + fmt::{Debug, Display}, + sync::Arc, +}; + +pub type Result = std::result::Result; + +/// A type convertible into a `Cow<'static, str>`. +/// +/// This impl ensures we can avoid allocations for all of the static string +/// error messages which exist in the Hipcheck source code. +pub trait Introspect: Into> {} +impl>> Introspect for T {} + +/// An error type compatible with Salsa. +pub struct Error { + /// The start of the error linked list. + head: Arc, +} + +impl Error { + /// Create a new `Error` with a message source. + pub fn msg(message: impl Introspect) -> Self { + let error = Message(message.into()); + Error::new(error) + } + + /// Create a new `Error` from a source error. + pub fn new(error: M) -> Self + where + M: StdError + Send + Sync + 'static, + { + Error { + head: Arc::new(ErrorNode { + current: Arc::new(error), + next: None, + }), + } + } + + /// Add additional context to an `Error` + pub(crate) fn context(self, context: M) -> Self + where + M: Introspect + 'static, + { + let message: Cow<'static, str> = context.into(); + + Error { + head: Arc::new(ErrorNode { + current: Arc::new(Message(message)), + next: Some(self.head), + }), + } + } + + /// Get an iterator over the errors in a chain. + pub fn chain(&self) -> Chain { + Chain::new(self) + } +} + +/// Allows use of `?` operator on query system entry. +impl From for Error +where + T: StdError + Send + Sync + 'static, +{ + fn from(std_error: T) -> Error { + Error::new(std_error) + } +} + +impl Clone for Error { + fn clone(&self) -> Error { + Error { + head: Arc::clone(&self.head), + } + } +} + +// By defining all `Error` instances to be equal, the query system +// will not update a value with further errors after reaching an +// initial one. +impl PartialEq for Error { + fn eq(&self, _: &Self) -> bool { + true + } +} + +impl Eq for Error {} + +impl Debug for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Delegate to the debug impl for the head of the list. + Debug::fmt(self.head.as_ref(), f) + } +} + +impl Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Delegate to the display impl for the head of the list. + Display::fmt(self.head.as_ref(), f) + } +} + +/// A single node in the linked list of errors. +pub struct ErrorNode { + /// The current error. + current: ErrorObj, + /// A next error, if present. + next: Option, +} + +impl Debug for ErrorNode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.current)?; + + if self.next.is_some() { + write!(f, "\n\nCaused by: ")?; + + let mut index = 0; + let mut link = self.next.as_ref(); + + while let Some(step) = link { + write!(f, "\n{:5}: {}", index, step.current)?; + link = step.next.as_ref(); + index += 1; + } + + match (index, link) { + // Only printed one message. + (0, Some(step)) => write!(f, "\n {}", step.current)?, + // Printed more than one. + (_, Some(step)) => write!(f, "\n{:5}: {}", index, step.current)?, + // Nothing to print. + (_, None) => {} + } + } + + Ok(()) + } +} + +impl Display for ErrorNode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.current) + } +} + +impl StdError for ErrorNode { + fn source(&self) -> Option<&(dyn StdError + 'static)> { + self.next + .as_deref() + .map(|node| node as &(dyn StdError + 'static)) + } +} + +/// A reference-counted fat pointer to a standard error type. +type ErrorObj = Arc; + +/// A link in the linked list. +type ErrorLink = Arc; + +/// A string-only error message, which can either be a static string +/// slice, or an owned string. +#[derive(Debug)] +struct Message(Cow<'static, str>); + +impl Display for Message { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl StdError for Message { + fn source(&self) -> Option<&(dyn StdError + 'static)> { + None + } +} + +pub struct Chain<'e> { + current: Option<&'e ErrorNode>, +} + +impl<'e> Chain<'e> { + fn new(error: &Error) -> Chain<'_> { + Chain { + current: Some(error.head.as_ref()), + } + } +} + +impl<'e> Iterator for Chain<'e> { + type Item = &'e ErrorNode; + + fn next(&mut self) -> Option { + match self.current { + Some(node) => { + self.current = node.next.as_deref(); + Some(node) + } + None => None, + } + } +} + +/// A limited analogue of the `anyhow!` macro for `Error`. Only +/// intended for input suitable for the `Error::msg` function. +#[macro_export] +macro_rules! hc_error { + ($msg:literal $(,)?) => { + $crate::error::Error::msg($msg) + }; + ($fmt:expr, $($arg:tt)*) => { + $crate::error::Error::msg(format!($fmt, $($arg)*)) + }; +} + +#[cfg(test)] +mod tests { + //! Tests to ensure `Error` produces output correctly. + + // Literal input to `hc_error` + #[test] + fn macro_literal() { + let error = hc_error!("msg source"); + let debug = format!("{:?}", error); + let expected = "msg source".to_string(); + assert_eq!(expected, debug); + } + + // Format string input to `hc_error` + #[test] + fn macro_format_string() { + let msg = "msg"; + let source = "source"; + let error = hc_error!("format {} {}", msg, source); + let debug = format!("{:?}", error); + let expected = "format msg source".to_string(); + assert_eq!(expected, debug); + } + + // Verify that the `chain` method on `hc_error` works. + #[test] + fn hc_error_chain() { + let error = hc_error!("first error"); + let error = error.context("second error"); + let error = error.context("third error"); + + let mut iter = error.chain(); + + assert_eq!("third error", iter.next().unwrap().to_string()); + assert_eq!("second error", iter.next().unwrap().to_string()); + assert_eq!("first error", iter.next().unwrap().to_string()); + } +} diff --git a/plugins/binary/src/fs.rs b/plugins/binary/src/fs.rs new file mode 100644 index 00000000..abf05af2 --- /dev/null +++ b/plugins/binary/src/fs.rs @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: Apache-2.0 + +use crate::error::*; + +use serde::de::DeserializeOwned; +use std::{fs, path::Path}; + +/// Read a file to a string. +pub fn read_string>(path: P) -> Result { + fn inner(path: &Path) -> Result { + fs::read_to_string(path) + .with_context(|| format!("failed to read as UTF-8 string '{}'", path.display())) + } + + inner(path.as_ref()) +} + +/// Read file to a struct that can be deserialized from TOML format. +pub fn read_toml, T: DeserializeOwned>(path: P) -> Result { + let path = path.as_ref(); + let contents = read_string(path)?; + toml::de::from_str(&contents) + .with_context(|| format!("failed to read as TOML '{}'", path.display())) +} diff --git a/plugins/binary/src/main.rs b/plugins/binary/src/main.rs new file mode 100644 index 00000000..667f12fc --- /dev/null +++ b/plugins/binary/src/main.rs @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: Apache-2.0 + +mod binary_detector; +mod error; +mod fs; + +use crate::binary_detector::{detect_binary_files, BinaryFileDetector}; + +use clap::Parser; +use hipcheck_sdk::{prelude::*, types::Target}; +use pathbuf::pathbuf; +use serde::Deserialize; + +use std::{path::PathBuf, result::Result as StdResult, sync::OnceLock}; + +pub static DETECTOR: OnceLock = OnceLock::new(); + +#[derive(Deserialize)] +struct RawConfig { + binary_file: Option, + binary_file_threshold: Option, +} + +struct Config { + binary_file: PathBuf, + opt_threshold: Option, +} + +impl TryFrom for Config { + type Error = hipcheck_sdk::error::ConfigError; + fn try_from(value: RawConfig) -> StdResult { + let Some(binary_file) = value.binary_file else { + return Err(ConfigError::MissingRequiredConfig { + field_name: "binary_file".to_owned(), + field_type: "string".to_owned(), + possible_values: vec![], + }); + }; + let opt_threshold = value.binary_file_threshold; + Ok(Config { + binary_file, + opt_threshold, + }) + } +} + +#[query(default)] +async fn binary(engine: &mut PluginEngine, value: Target) -> Result> { + let bfd = DETECTOR.get().ok_or(Error::UnspecifiedQueryState)?; + let repo = pathbuf![&value.local.path]; + let out: Vec = detect_binary_files(&repo) + .map_err(|_| Error::UnspecifiedQueryState)? + .into_iter() + .filter(|f| bfd.is_likely_binary_file(f)) + .collect(); + out.iter().for_each(|f| { + engine.record_concern(format!("Found binary file at '{}'", f.to_string_lossy())) + }); + Ok(out) +} + +#[derive(Clone, Debug, Default)] +struct BinaryPlugin { + policy_conf: OnceLock>, +} + +impl Plugin for BinaryPlugin { + const PUBLISHER: &'static str = "mitre"; + const NAME: &'static str = "binary"; + + fn set_config(&self, config: Value) -> StdResult<(), ConfigError> { + // Deserialize and validate the config struct + let conf: Config = serde_json::from_value::(config) + .map_err(|e| ConfigError::Unspecified { + message: e.to_string(), + })? + .try_into()?; + + // Store the policy conf to be accessed only in the `default_policy_expr()` impl + self.policy_conf + .set(conf.opt_threshold) + .map_err(|_| ConfigError::Unspecified { + message: "plugin was already configured".to_string(), + })?; + + // Use the langs file to create a SourceFileDetector and init the salsa db + let bfd = + BinaryFileDetector::load(conf.binary_file).map_err(|e| ConfigError::Unspecified { + message: e.to_string(), + })?; + + // Make the salsa db globally accessible + DETECTOR.set(bfd).map_err(|_e| ConfigError::Unspecified { + message: "config was already set".to_owned(), + }) + } + + fn default_policy_expr(&self) -> Result { + match self.policy_conf.get() { + None => Err(Error::UnspecifiedQueryState), + // If no policy vars, we have no default expr + Some(None) => Ok("".to_owned()), + // Use policy config vars to construct a default expr + Some(Some(policy_conf)) => Ok(format!("(lte $ {}))", policy_conf)), + } + } + + fn explain_default_query(&self) -> Result> { + Ok(Some( + "Returns number of detected binary files in a repo".to_owned(), + )) + } + + queries! {} +} + +#[derive(Parser, Debug)] +struct Args { + #[arg(long)] + port: u16, +} + +#[tokio::main(flavor = "current_thread")] +async fn main() -> Result<()> { + let args = Args::try_parse().unwrap(); + PluginServer::register(BinaryPlugin::default()) + .listen(args.port) + .await +}