Skip to content

Commit

Permalink
Bug Fixes and Improvements
Browse files Browse the repository at this point in the history
- Fix DotNet Header Alignment Bug
- Improving BLCompare tool
  • Loading branch information
c3rb3ru5d3d53c committed Dec 12, 2024
1 parent 9b953bf commit 99014d5
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 45 deletions.
8 changes: 8 additions & 0 deletions src/bin/binlex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -614,10 +614,14 @@ fn process_pe(input: String, config: Config, tags: Option<Vec<String>>, output:
let mapped_file = pe.image()
.unwrap_or_else(|error| { eprintln!("failed to map pe image: {}", error); process::exit(1)});

Stderr::print_debug(config.clone(), "mapped pe image");

let image = mapped_file
.mmap()
.unwrap_or_else(|error| { eprintln!("failed to get pe virtual image: {}", error); process::exit(1); });

Stderr::print_debug(config.clone(), "obtained mapped image pointer");

let executable_address_ranges = match pe.is_dotnet() {
true => pe.dotnet_executable_virtual_address_ranges(),
_ => pe.executable_virtual_address_ranges(),
Expand All @@ -635,6 +639,8 @@ fn process_pe(input: String, config: Config, tags: Option<Vec<String>>, output:
let mut cfg = Graph::new(pe.architecture(), config.clone());

if !pe.is_dotnet() {
Stderr::print_debug(config.clone(), "starting pe disassembler");

let disassembler = match Disassembler::new(pe.architecture(), &image, executable_address_ranges.clone()) {
Ok(disassembler) => disassembler,
Err(error) => {
Expand All @@ -649,6 +655,8 @@ fn process_pe(input: String, config: Config, tags: Option<Vec<String>>, output:
process::exit(1);
});
} else if pe.is_dotnet() {
Stderr::print_debug(config.clone(), "starting pe dotnet disassembler");

let disassembler = match CILDisassembler::new(pe.architecture(), &image, pe.dotnet_metadata_token_virtual_addresses().clone(), executable_address_ranges.clone()) {
Ok(disassembler) => disassembler,
Err(error) => {
Expand Down
83 changes: 73 additions & 10 deletions src/bin/blcompare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,13 @@ use serde_json::Value;

use binlex::{AUTHOR, VERSION};
use binlex::hashing::TLSH;
use binlex::hashing::MinHash32;
use binlex::io::{JSON, Stdout};

#[derive(Serialize, Deserialize)]
pub struct SimilarityScoreJson{
pub tlsh: Option<f64>,
pub minhash: Option<f64>,
}

/// Structure to represent the comparison result between two JSON entries.
Expand Down Expand Up @@ -340,7 +342,6 @@ fn compare_json_entries(json_lhs: &JSON, json_rhs: &JSON) {

for lhs in lhs_entries {


let lhs_type = match extract_string_value(lhs, "type") {
Some(t) => t,
None => continue,
Expand All @@ -350,8 +351,11 @@ fn compare_json_entries(json_lhs: &JSON, json_rhs: &JSON) {
Some(a) => a,
None => continue,
};

let lhs_tlsh = extract_nested_field(lhs, "chromosome", "tlsh");

let lhs_minhash = extract_nested_field(lhs, "chromosome", "minhash");

let lhs_contiguous = match extract_boolean_value(lhs, "contiguous") {
Some(t) => t,
None => continue,
Expand All @@ -367,8 +371,11 @@ fn compare_json_entries(json_lhs: &JSON, json_rhs: &JSON) {
Some(a) => a,
None => continue,
};

let rhs_tlsh = extract_nested_field(rhs, "chromosome", "tlsh");

let rhs_minhash = extract_nested_field(rhs, "chromosome", "minhash");

let rhs_contiguous = match extract_boolean_value(rhs, "contiguous") {
Some(t) => t,
None => continue,
Expand All @@ -379,21 +386,28 @@ fn compare_json_entries(json_lhs: &JSON, json_rhs: &JSON) {
}

let mut tlsh_similarity: Option<f64> = None;
let mut minhash_similarity: Option<f64> = None;

if lhs_contiguous == true && rhs_contiguous == true && lhs_tlsh.is_some() && rhs_tlsh.is_some() {
tlsh_similarity = TLSH::compare(
lhs_tlsh.clone().unwrap(),
rhs_tlsh.clone().unwrap())
.ok()
.map(|score| score as f64);

}

if lhs_contiguous == true && rhs_contiguous == true && lhs_minhash.is_some() && rhs_minhash.is_some() {
minhash_similarity = Some(MinHash32::jaccard_similarity_from_hexdigests(&lhs_minhash.clone().unwrap(), &rhs_minhash.clone().unwrap()));
}

if (lhs_contiguous == false || rhs_contiguous == false) && lhs_type == "function" && rhs_type == "function" {
if let (Some(lhs_blocks), Some(rhs_blocks)) = (
lhs.get("blocks").and_then(|b| b.as_array()),
rhs.get("blocks").and_then(|b| b.as_array()),
) {
tlsh_similarity = calculate_non_contiguous_similarity(lhs_blocks, rhs_blocks);
tlsh_similarity = calculate_non_contiguous_tlsh_similarity(lhs_blocks, rhs_blocks);
minhash_similarity = calculate_non_contiguous_minhash_similarity(lhs_blocks, rhs_blocks);
}
}

Expand All @@ -405,6 +419,7 @@ fn compare_json_entries(json_lhs: &JSON, json_rhs: &JSON) {
rhs: rhs.clone(),
similarity: SimilarityScoreJson {
tlsh: tlsh_similarity,
minhash: minhash_similarity,
},
};

Expand All @@ -416,13 +431,54 @@ fn compare_json_entries(json_lhs: &JSON, json_rhs: &JSON) {
}
}

fn calculate_non_contiguous_similarity(lhs_blocks: &[Value], rhs_blocks: &[Value]) -> Option<f64> {
let mut best_similarities = Vec::new();
fn calculate_non_contiguous_minhash_similarity(lhs_blocks: &[Value], rhs_blocks: &[Value]) -> Option<f64> {
let lhs_minhash_values = extract_minhash_values(lhs_blocks);
let rhs_minhash_values = extract_minhash_values(rhs_blocks);

for lhs_tlsh in extract_tlsh_values(lhs_blocks) {
if lhs_blocks.len() != lhs_minhash_values.len() || rhs_blocks.len() != rhs_minhash_values.len() {
return None;
}

let mut similarities = Vec::new();

for lhs_tlsh in lhs_minhash_values {
let mut best_similarity: Option<f64> = None;

for rhs_tlsh in &rhs_minhash_values {
let similarity = MinHash32::jaccard_similarity_from_hexdigests(&lhs_tlsh.clone(), &rhs_tlsh.clone());
best_similarity = match best_similarity {
Some(current_best) => Some(current_best.max(similarity)),
None => Some(similarity),
};
}

if let Some(similarity) = best_similarity {
similarities.push(similarity as f64);
}
}

if !similarities.is_empty() {
let total_similarity: f64 = similarities.iter().sum();
return Some(total_similarity / similarities.len() as f64);
}

None
}

fn calculate_non_contiguous_tlsh_similarity(lhs_blocks: &[Value], rhs_blocks: &[Value]) -> Option<f64> {
let lhs_tlsh_values = extract_tlsh_values(lhs_blocks);
let rhs_tlsh_values = extract_tlsh_values(rhs_blocks);

if lhs_blocks.len() != lhs_tlsh_values.len() || rhs_blocks.len() != rhs_tlsh_values.len() {
return None;
}

let mut similarities = Vec::new();

for lhs_tlsh in lhs_tlsh_values {
let mut best_similarity: Option<u32> = None;

for rhs_tlsh in extract_tlsh_values(rhs_blocks) {
for rhs_tlsh in &rhs_tlsh_values {
if let Ok(similarity) = TLSH::compare(lhs_tlsh.clone(), rhs_tlsh.clone()) {
best_similarity = match best_similarity {
Some(current_best) => Some(current_best.min(similarity)),
Expand All @@ -432,18 +488,25 @@ fn calculate_non_contiguous_similarity(lhs_blocks: &[Value], rhs_blocks: &[Value
}

if let Some(similarity) = best_similarity {
best_similarities.push(similarity as f64);
similarities.push(similarity as f64);
}
}

if !best_similarities.is_empty() {
let total_similarity: f64 = best_similarities.iter().sum();
return Some(total_similarity / best_similarities.len() as f64);
if !similarities.is_empty() {
let total_similarity: f64 = similarities.iter().sum();
return Some(total_similarity / similarities.len() as f64);
}

None
}

fn extract_minhash_values(blocks: &[Value]) -> Vec<String> {
blocks
.iter()
.filter_map(|block| extract_nested_field(block, "chromosome", "minhash"))
.collect()
}

fn extract_tlsh_values(blocks: &[Value]) -> Vec<String> {
blocks
.iter()
Expand Down
41 changes: 13 additions & 28 deletions src/formats/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,6 @@ impl Cor20Header {
if bytes.len() != mem::size_of::<Self>() {
return None;
}
if bytes.as_ptr().align_offset(mem::align_of::<Self>()) != 0 {
return None;
}
Some(unsafe { &*(bytes.as_ptr() as *const Self) })
}

Expand Down Expand Up @@ -350,9 +347,6 @@ impl StorageSignature {
if bytes.len() != mem::size_of::<Self>() {
return None;
}
if bytes.as_ptr().align_offset(mem::align_of::<Self>()) != 0 {
return None;
}
Some(unsafe { &*(bytes.as_ptr() as *const Self) })
}

Expand Down Expand Up @@ -393,9 +387,6 @@ impl StorageHeader {
if bytes.len() != mem::size_of::<Self>() {
return None;
}
if bytes.as_ptr().align_offset(mem::align_of::<Self>()) != 0 {
return None;
}
Some(unsafe { &*(bytes.as_ptr() as *const Self) })
}

Expand Down Expand Up @@ -481,7 +472,7 @@ impl StreamHeader {

/// Represents a Metadata Table header in a .NET metadata structure.
///
/// The `MetadataTable` provides information about the structure and versioning of
/// The `MetadataTable` provides information about the structure and versioning of
/// the metadata, as well as the sizes and characteristics of various heaps.
#[repr(C)]
pub struct MetadataTable {
Expand All @@ -504,7 +495,7 @@ pub struct MetadataTable {
impl MetadataTable {
/// Parses a `MetadataTable` from a byte slice.
///
/// This function validates the size and alignment of the byte slice before
/// This function validates the size and alignment of the byte slice before
/// returning a reference to the `MetadataTable`.
///
/// # Parameters
Expand All @@ -519,9 +510,6 @@ impl MetadataTable {
if bytes.len() != mem::size_of::<Self>() {
return None;
}
if bytes.as_ptr().align_offset(mem::align_of::<Self>()) != 0 {
return None;
}
Some(unsafe { &*(bytes.as_ptr() as *const Self) })
}

Expand Down Expand Up @@ -891,7 +879,7 @@ impl SimpleTableIndex {
/// # Parameters
///
/// * `bytes` - A byte slice containing the index data.
/// * `heap_size` - A `u8` value indicating the size of the heap (used to determine
/// * `heap_size` - A `u8` value indicating the size of the heap (used to determine
/// whether the index is 2 or 4 bytes).
///
/// # Returns
Expand Down Expand Up @@ -944,7 +932,7 @@ impl StringHeapIndex {
/// # Parameters
///
/// * `bytes` - A byte slice containing the index data.
/// * `heap_size` - A `u8` value indicating the size of the heap (used to determine
/// * `heap_size` - A `u8` value indicating the size of the heap (used to determine
/// whether the index is 2 or 4 bytes).
///
/// # Returns
Expand Down Expand Up @@ -997,7 +985,7 @@ impl GuidHeapIndex {
/// # Parameters
///
/// * `bytes` - A byte slice containing the index data.
/// * `heap_size` - A `u8` value indicating the size of the heap (used to determine
/// * `heap_size` - A `u8` value indicating the size of the heap (used to determine
/// whether the index is 2 or 4 bytes).
///
/// # Returns
Expand Down Expand Up @@ -1050,7 +1038,7 @@ impl ResolutionScopeIndex {
/// # Parameters
///
/// * `bytes` - A byte slice containing the index data.
/// * `heap_size` - A `u8` value indicating the size of the heap (used to determine
/// * `heap_size` - A `u8` value indicating the size of the heap (used to determine
/// whether the index is 2 or 4 bytes).
///
/// # Returns
Expand Down Expand Up @@ -1104,7 +1092,7 @@ impl TypeDefOrRefIndex {
/// # Parameters
///
/// * `bytes` - A byte slice containing the index data.
/// * `heap_size` - A `u8` value indicating the size of the heap (used to determine
/// * `heap_size` - A `u8` value indicating the size of the heap (used to determine
/// whether the index is 2 or 4 bytes).
///
/// # Returns
Expand Down Expand Up @@ -1138,7 +1126,7 @@ impl TypeDefOrRefIndex {

/// Represents an index into the Blob heap in a .NET metadata structure.
///
/// The `BlobHeapIndex` is used to reference data in the Blob heap, which contains
/// The `BlobHeapIndex` is used to reference data in the Blob heap, which contains
/// metadata such as constants, custom attributes, and signatures.
///
/// # Fields
Expand All @@ -1162,7 +1150,7 @@ impl BlobHeapIndex {
/// # Parameters
///
/// * `bytes` - A byte slice containing the index data.
/// * `heap_size` - A `u8` value indicating the size of the heap (used to determine
/// * `heap_size` - A `u8` value indicating the size of the heap (used to determine
/// whether the index is 2 or 4 bytes).
///
/// # Returns
Expand Down Expand Up @@ -1196,7 +1184,7 @@ impl BlobHeapIndex {

/// Represents an entry in the .NET metadata table.
///
/// Each entry corresponds to a specific metadata table type, such as `Module`,
/// Each entry corresponds to a specific metadata table type, such as `Module`,
/// `TypeRef`, `TypeDef`, `Field`, or `MethodDef`.
///
/// # Variants
Expand Down Expand Up @@ -1231,7 +1219,7 @@ pub struct TinyHeader {
impl TinyHeader {
/// Parses a `TinyHeader` from a byte slice.
///
/// This function validates the size and alignment of the byte slice before
/// This function validates the size and alignment of the byte slice before
/// returning a reference to the `TinyHeader`.
///
/// # Parameters
Expand All @@ -1246,9 +1234,6 @@ impl TinyHeader {
if bytes.len() != mem::size_of::<Self>() {
return None;
}
if bytes.as_ptr().align_offset(mem::align_of::<Self>()) != 0 {
return None;
}
Some(unsafe { &*(bytes.as_ptr() as *const Self) })
}

Expand All @@ -1259,7 +1244,7 @@ impl TinyHeader {

/// Represents the method header in a .NET executable.
///
/// The method header can either be a `Tiny` or `Fat` header, depending on the
/// The method header can either be a `Tiny` or `Fat` header, depending on the
/// method's structure and size.
///
/// # Variants
Expand Down Expand Up @@ -1301,7 +1286,7 @@ impl MethodHeader {

/// Represents a fat method header in a .NET executable.
///
/// The fat header provides detailed information about a method, including its
/// The fat header provides detailed information about a method, including its
/// flags, stack size, code size, and local variable signature token.
#[repr(C)]
pub struct FatHeader {
Expand Down
Loading

0 comments on commit 99014d5

Please sign in to comment.