Skip to content

Commit

Permalink
Add cmp utility
Browse files Browse the repository at this point in the history
The utility should support all the arguments supported by GNU cmp and
perform slightly better.

On a "bad" scenario, ~36M files which are completely different, our
version runs in ~72% of the time of the original on my M1 Max:

 > hyperfine --warmup 1 -i --output=pipe \
     'cmp -l huge huge.3'
 Benchmark 1: cmp -l huge huge.3
   Time (mean ± σ):      3.237 s ±  0.014 s    [User: 2.891 s, System: 0.341 s]
   Range (min … max):    3.221 s …  3.271 s    10 runs

   Warning: Ignoring non-zero exit code.

 > hyperfine --warmup 1 -i --output=pipe \
     '../target/release/diffutils cmp -l huge huge.3'
 Benchmark 1: ../target/release/diffutils cmp -l huge huge.3
   Time (mean ± σ):      2.392 s ±  0.009 s    [User: 1.978 s, System: 0.406 s]
   Range (min … max):    2.378 s …  2.406 s    10 runs

   Warning: Ignoring non-zero exit code.

Our cmp runs in ~116% of the time when comparing libxul.so to the
chromium-browser binary with -l and -b. In a best case scenario of
comparing 2 files which are the same except for the last byte, our
tool is slightly faster.
  • Loading branch information
kov committed Oct 1, 2024
1 parent 72c7802 commit 5005741
Show file tree
Hide file tree
Showing 12 changed files with 2,089 additions and 281 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/fuzzing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ jobs:
strategy:
matrix:
test-target:
- { name: fuzz_cmp, should_pass: true }
- { name: fuzz_cmp_args, should_pass: true }
- { name: fuzz_ed, should_pass: true }
- { name: fuzz_normal, should_pass: true }
- { name: fuzz_patch, should_pass: true }
Expand Down
12 changes: 12 additions & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@ diffutils = { path = "../" }
[workspace]
members = ["."]

[[bin]]
name = "fuzz_cmp"
path = "fuzz_targets/fuzz_cmp.rs"
test = false
doc = false

[[bin]]
name = "fuzz_cmp_args"
path = "fuzz_targets/fuzz_cmp_args.rs"
test = false
doc = false

[[bin]]
name = "fuzz_patch"
path = "fuzz_targets/fuzz_patch.rs"
Expand Down
36 changes: 36 additions & 0 deletions fuzz/dictionaries/cmp.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"-l"
"--verbose"
"-b"
"--print-bytes"
"-lb"
"-bl"
"-n"
"--bytes"
"--bytes="
"--bytes=1024"
"--bytes=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
"-i"
"--ignore-initial"
"--ignore-initial="
"--ignore-initial=1024"
"--ignore-initial=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999:9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
"-s"
"-q"
"--quiet"
"--silent"
"-"
"--"
"1kB"
"1G"
"1GB"
"1T"
"1TB"
"1P"
"1PB"
"1Z"
"1ZB"
"1Y"
"1YB"
"1Y"
"0"
"1:2"
51 changes: 51 additions & 0 deletions fuzz/fuzz_targets/fuzz_cmp.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#![no_main]
#[macro_use]
extern crate libfuzzer_sys;
use diffutilslib::cmp::{self, Cmp};

use std::ffi::OsString;
use std::fs::File;
use std::io::Write;

fn os(s: &str) -> OsString {
OsString::from(s)
}

fuzz_target!(|x: (Vec<u8>, Vec<u8>)| {
let args = vec!["cmp", "-l", "-b", "target/fuzz.cmp.a", "target/fuzz.cmp.b"]
.into_iter()
.map(|s| os(s))
.peekable();

let (from, to) = x;

File::create("target/fuzz.cmp.a")
.unwrap()
.write_all(&from)
.unwrap();

File::create("target/fuzz.cmp.b")
.unwrap()
.write_all(&to)
.unwrap();

let params =
cmp::parse_params(args).unwrap_or_else(|e| panic!("Failed to parse params: {}", e));
let ret = cmp::cmp(&params);
if from == to && !matches!(ret, Ok(Cmp::Equal)) {
panic!(
"target/fuzz.cmp.a and target/fuzz.cmp.b are equal, but cmp returned {:?}.",
ret
);
} else if from != to && !matches!(ret, Ok(Cmp::Different)) {
panic!(
"target/fuzz.cmp.a and target/fuzz.cmp.b are different, but cmp returned {:?}.",
ret
);
} else if ret.is_err() {
panic!(
"target/fuzz.cmp.a and target/fuzz.cmp.b caused cmp to error ({:?}).",
ret
);
}
});
23 changes: 23 additions & 0 deletions fuzz/fuzz_targets/fuzz_cmp_args.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#![no_main]
#[macro_use]
extern crate libfuzzer_sys;
use diffutilslib::cmp;

use libfuzzer_sys::Corpus;
use std::ffi::OsString;

fn os(s: &str) -> OsString {
OsString::from(s)
}

fuzz_target!(|x: Vec<OsString>| -> Corpus {
if x.len() > 6 {
// Make sure we try to parse an option when we get longer args. x[0] will be
// the executable name.
if ![os("-l"), os("-b"), os("-s"), os("-n"), os("-i")].contains(&x[1]) {
return Corpus::Reject;
}
}
let _ = cmp::parse_params(x.into_iter().peekable());
Corpus::Keep
});
Loading

0 comments on commit 5005741

Please sign in to comment.