diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index 589b952..9ad1c17 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -41,6 +41,8 @@ jobs: strategy: matrix: test-target: + - { name: fuzz_cmp, should_pass: true } + - { name: fuzz_cmp_args, should_pass: true } - { name: fuzz_ed, should_pass: true } - { name: fuzz_normal, should_pass: true } - { name: fuzz_patch, should_pass: true } diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 5debf47..8b0b521 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -16,6 +16,18 @@ diffutils = { path = "../" } [workspace] members = ["."] +[[bin]] +name = "fuzz_cmp" +path = "fuzz_targets/fuzz_cmp.rs" +test = false +doc = false + +[[bin]] +name = "fuzz_cmp_args" +path = "fuzz_targets/fuzz_cmp_args.rs" +test = false +doc = false + [[bin]] name = "fuzz_patch" path = "fuzz_targets/fuzz_patch.rs" diff --git a/fuzz/dictionaries/cmp.txt b/fuzz/dictionaries/cmp.txt new file mode 100644 index 0000000..0365fef --- /dev/null +++ b/fuzz/dictionaries/cmp.txt @@ -0,0 +1,36 @@ +"-l" +"--verbose" +"-b" +"--print-bytes" +"-lb" +"-bl" +"-n" +"--bytes" +"--bytes=" +"--bytes=1024" +"--bytes=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999" +"-i" +"--ignore-initial" +"--ignore-initial=" +"--ignore-initial=1024" +"--ignore-initial=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999:9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999" +"-s" +"-q" +"--quiet" +"--silent" +"-" +"--" +"1kB" +"1G" +"1GB" +"1T" +"1TB" +"1P" +"1PB" +"1Z" +"1ZB" +"1Y" +"1YB" +"1Y" +"0" +"1:2" diff --git a/fuzz/fuzz_targets/fuzz_cmp.rs b/fuzz/fuzz_targets/fuzz_cmp.rs new file mode 100644 index 0000000..e9d0e4c --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_cmp.rs @@ -0,0 +1,51 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; +use diffutilslib::cmp::{self, Cmp}; + +use std::ffi::OsString; +use std::fs::File; +use std::io::Write; + +fn os(s: &str) -> OsString { + OsString::from(s) +} + +fuzz_target!(|x: (Vec, Vec)| { + let args = vec!["cmp", "-l", "-b", "target/fuzz.cmp.a", "target/fuzz.cmp.b"] + .into_iter() + .map(|s| os(s)) + .peekable(); + + let (from, to) = x; + + File::create("target/fuzz.cmp.a") + .unwrap() + .write_all(&from) + .unwrap(); + + File::create("target/fuzz.cmp.b") + .unwrap() + .write_all(&to) + .unwrap(); + + let params = + cmp::parse_params(args).unwrap_or_else(|e| panic!("Failed to parse params: {}", e)); + let ret = cmp::cmp(¶ms); + if from == to && !matches!(ret, Ok(Cmp::Equal)) { + panic!( + "target/fuzz.cmp.a and target/fuzz.cmp.b are equal, but cmp returned {:?}.", + ret + ); + } else if from != to && !matches!(ret, Ok(Cmp::Different)) { + panic!( + "target/fuzz.cmp.a and target/fuzz.cmp.b are different, but cmp returned {:?}.", + ret + ); + } else if ret.is_err() { + panic!( + "target/fuzz.cmp.a and target/fuzz.cmp.b caused cmp to error ({:?}).", + ret + ); + } +}); diff --git a/fuzz/fuzz_targets/fuzz_cmp_args.rs b/fuzz/fuzz_targets/fuzz_cmp_args.rs new file mode 100644 index 0000000..579cf34 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_cmp_args.rs @@ -0,0 +1,23 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; +use diffutilslib::cmp; + +use libfuzzer_sys::Corpus; +use std::ffi::OsString; + +fn os(s: &str) -> OsString { + OsString::from(s) +} + +fuzz_target!(|x: Vec| -> Corpus { + if x.len() > 6 { + // Make sure we try to parse an option when we get longer args. x[0] will be + // the executable name. + if ![os("-l"), os("-b"), os("-s"), os("-n"), os("-i")].contains(&x[1]) { + return Corpus::Reject; + } + } + let _ = cmp::parse_params(x.into_iter().peekable()); + Corpus::Keep +}); diff --git a/src/cmp.rs b/src/cmp.rs new file mode 100644 index 0000000..29b8775 --- /dev/null +++ b/src/cmp.rs @@ -0,0 +1,1115 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +use crate::utils::format_failure_to_read_input_file; +use std::env::{self, ArgsOs}; +use std::ffi::OsString; +use std::io::{BufRead, BufReader, BufWriter, Read, Write}; +use std::iter::Peekable; +use std::process::ExitCode; +use std::{fs, io}; + +#[cfg(not(target_os = "windows"))] +use std::os::fd::{AsRawFd, FromRawFd}; + +#[cfg(not(target_os = "windows"))] +use std::os::unix::fs::MetadataExt; + +#[cfg(target_os = "windows")] +use std::os::windows::fs::MetadataExt; + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct Params { + executable: OsString, + from: OsString, + to: OsString, + print_bytes: bool, + skip_a: Option, + skip_b: Option, + max_bytes: Option, + verbose: bool, + quiet: bool, +} + +#[inline] +fn usage_string(executable: &str) -> String { + format!("Usage: {} ", executable) +} + +#[cfg(not(target_os = "windows"))] +fn is_stdout_dev_null() -> bool { + let Ok(dev_null) = fs::metadata("/dev/null") else { + return false; + }; + + let stdout_fd = io::stdout().lock().as_raw_fd(); + + // SAFETY: we have exclusive access to stdout right now. + let stdout_file = unsafe { fs::File::from_raw_fd(stdout_fd) }; + let Ok(stdout) = stdout_file.metadata() else { + return false; + }; + + let is_dev_null = stdout.dev() == dev_null.dev() && stdout.ino() == dev_null.ino(); + + // Don't let File close the fd. It's unfortunate that File doesn't have a leak_fd(). + std::mem::forget(stdout_file); + + is_dev_null +} + +pub fn parse_params>(mut opts: Peekable) -> Result { + let Some(executable) = opts.next() else { + return Err("Usage: ".to_string()); + }; + let executable_str = executable.to_string_lossy().to_string(); + + let parse_skip = |param: &str, skip_desc: &str| -> Result { + let suffix_start = param + .find(|b: char| !b.is_ascii_digit()) + .unwrap_or(param.len()); + let mut num = match param[..suffix_start].parse::() { + Ok(num) => num, + Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, + Err(_) => { + return Err(format!( + "{}: invalid --ignore-initial value '{}'", + executable_str, skip_desc + )) + } + }; + + if suffix_start != param.len() { + // Note that GNU cmp advertises supporting up to Y, but fails if you try + // to actually use anything beyond E. + let multiplier: usize = match ¶m[suffix_start..] { + "kB" => 1_000, + "K" => 1_024, + "MB" => 1_000_000, + "M" => 1_048_576, + "GB" => 1_000_000_000, + "G" => 1_073_741_824, + "TB" => 1_000_000_000_000, + "T" => 1_099_511_627_776, + "PB" => 1_000_000_000_000_000, + "P" => 1_125_899_906_842_624, + "EB" => 1_000_000_000_000_000_000, + "E" => 1_152_921_504_606_846_976, + "ZB" => usize::MAX, // 1_000_000_000_000_000_000_000, + "Z" => usize::MAX, // 1_180_591_620_717_411_303_424, + "YB" => usize::MAX, // 1_000_000_000_000_000_000_000_000, + "Y" => usize::MAX, // 1_208_925_819_614_629_174_706_176, + _ => { + return Err(format!( + "{}: invalid --ignore-initial value '{}'", + executable_str, skip_desc + )); + } + }; + + num = match num.overflowing_mul(multiplier) { + (n, false) => n, + _ => usize::MAX, + } + } + + Ok(num) + }; + + let mut params = Params { + executable, + ..Default::default() + }; + let mut from = None; + let mut to = None; + let mut skip_pos1 = None; + let mut skip_pos2 = None; + while let Some(param) = opts.next() { + if param == "--" { + break; + } + if param == "-" { + if from.is_none() { + from = Some(param); + } else if to.is_none() { + to = Some(param); + } else { + return Err(usage_string(&executable_str)); + } + continue; + } + if param == "-b" || param == "--print-bytes" { + params.print_bytes = true; + continue; + } + if param == "-l" || param == "--verbose" { + params.verbose = true; + continue; + } + if param == "-lb" || param == "-bl" { + params.print_bytes = true; + params.verbose = true; + continue; + } + + let param_str = param.to_string_lossy().to_string(); + if param == "-n" || param_str.starts_with("--bytes=") { + let max_bytes = if param == "-n" { + opts.next() + .ok_or_else(|| usage_string(&executable_str))? + .to_string_lossy() + .to_string() + } else { + let (_, arg) = param_str.split_once('=').unwrap(); + arg.to_string() + }; + let max_bytes = match max_bytes.parse::() { + Ok(num) => num, + Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, + Err(_) => { + return Err(format!( + "{}: invalid --bytes value '{}'", + executable_str, max_bytes + )) + } + }; + params.max_bytes = Some(max_bytes); + continue; + } + if param == "-i" || param_str.starts_with("--ignore-initial=") { + let skip_desc = if param == "-i" { + opts.next() + .ok_or_else(|| usage_string(&executable_str))? + .to_string_lossy() + .to_string() + } else { + let (_, arg) = param_str.split_once('=').unwrap(); + arg.to_string() + }; + let (skip_a, skip_b) = if let Some((skip_a, skip_b)) = skip_desc.split_once(':') { + ( + parse_skip(skip_a, &skip_desc)?, + parse_skip(skip_b, &skip_desc)?, + ) + } else { + let skip = parse_skip(&skip_desc, &skip_desc)?; + (skip, skip) + }; + params.skip_a = Some(skip_a); + params.skip_b = Some(skip_b); + continue; + } + if param == "-s" || param == "--quiet" || param == "--silent" { + params.quiet = true; + continue; + } + if param == "--help" { + println!("{}", usage_string(&executable_str)); + std::process::exit(0); + } + if param_str.starts_with('-') { + return Err(format!("Unknown option: {:?}", param)); + } + if from.is_none() { + from = Some(param); + } else if to.is_none() { + to = Some(param); + } else if skip_pos1.is_none() { + skip_pos1 = Some(parse_skip(¶m_str, ¶m_str)?); + } else if skip_pos2.is_none() { + skip_pos2 = Some(parse_skip(¶m_str, ¶m_str)?); + } else { + return Err(usage_string(&executable_str)); + } + } + + // Do as GNU cmp, and completely disable printing if we are + // outputing to /dev/null. + #[cfg(not(target_os = "windows"))] + if is_stdout_dev_null() { + params.quiet = true; + params.verbose = false; + params.print_bytes = false; + } + + if params.quiet && params.verbose { + return Err(format!( + "{}: options -l and -s are incompatible", + executable_str + )); + } + + params.from = if let Some(from) = from { + from + } else if let Some(param) = opts.next() { + param + } else { + return Err(usage_string(&executable_str)); + }; + params.to = if let Some(to) = to { + to + } else if let Some(param) = opts.next() { + param + } else { + OsString::from("-") + }; + + // GNU cmp ignores positional skip arguments if -i is provided. + if params.skip_a.is_none() { + if skip_pos1.is_some() { + params.skip_a = skip_pos1; + } else if let Some(param) = opts.next() { + let param_str = param.to_string_lossy().to_string(); + params.skip_a = Some(parse_skip(¶m_str, ¶m_str)?); + } + }; + if params.skip_b.is_none() { + if skip_pos2.is_some() { + params.skip_b = skip_pos2; + } else if let Some(param) = opts.next() { + let param_str = param.to_string_lossy().to_string(); + params.skip_b = Some(parse_skip(¶m_str, ¶m_str)?); + } + } + + Ok(params) +} + +fn prepare_reader( + path: &OsString, + skip: &Option, + params: &Params, +) -> Result, String> { + let mut reader: Box = if path == "-" { + Box::new(BufReader::new(io::stdin())) + } else { + match fs::File::open(path) { + Ok(file) => Box::new(BufReader::new(file)), + Err(e) => { + return Err(format_failure_to_read_input_file( + ¶ms.executable, + path, + &e, + )); + } + } + }; + + if let Some(skip) = skip { + if let Err(e) = io::copy(&mut reader.by_ref().take(*skip as u64), &mut io::sink()) { + return Err(format_failure_to_read_input_file( + ¶ms.executable, + path, + &e, + )); + } + } + + Ok(reader) +} + +#[derive(Debug)] +pub enum Cmp { + Equal, + Different, +} + +pub fn cmp(params: &Params) -> Result { + let mut from = prepare_reader(¶ms.from, ¶ms.skip_a, params)?; + let mut to = prepare_reader(¶ms.to, ¶ms.skip_b, params)?; + + let mut at_byte = 1; + let mut at_line = 1; + let mut start_of_line = true; + let mut verbose_diffs = vec![]; + loop { + // Fill up our buffers. + let from_buf = match from.fill_buf() { + Ok(buf) => buf, + Err(e) => { + return Err(format_failure_to_read_input_file( + ¶ms.executable, + ¶ms.from, + &e, + )); + } + }; + + let to_buf = match to.fill_buf() { + Ok(buf) => buf, + Err(e) => { + return Err(format_failure_to_read_input_file( + ¶ms.executable, + ¶ms.to, + &e, + )); + } + }; + + // Check for EOF conditions. + if from_buf.is_empty() && to_buf.is_empty() { + break; + } + + if from_buf.is_empty() || to_buf.is_empty() { + let eof_on = if from_buf.is_empty() { + ¶ms.from.to_string_lossy() + } else { + ¶ms.to.to_string_lossy() + }; + + if params.verbose { + report_verbose_diffs(verbose_diffs, params)?; + } + + report_eof(at_byte, at_line, start_of_line, eof_on, params); + return Ok(Cmp::Different); + } + + // Fast path - for long files in which almost all bytes are the same we + // can do a direct comparison to let the compiler optimize. + let consumed = std::cmp::min(from_buf.len(), to_buf.len()); + if from_buf[..consumed] == to_buf[..consumed] { + let last = from_buf[..consumed].last().unwrap(); + + at_byte += consumed; + at_line += from_buf[..consumed].iter().filter(|&c| *c == b'\n').count(); + + start_of_line = *last == b'\n'; + + if let Some(max_bytes) = params.max_bytes { + if at_byte > max_bytes { + break; + } + } + + from.consume(consumed); + to.consume(consumed); + + continue; + } + + // Iterate over the buffers, the zip iterator will stop us as soon as the + // first one runs out. + for (&from_byte, &to_byte) in from_buf.iter().zip(to_buf.iter()) { + if from_byte != to_byte { + if params.verbose { + verbose_diffs.push((at_byte, from_byte, to_byte)); + } else { + report_difference(from_byte, to_byte, at_byte, at_line, params); + return Ok(Cmp::Different); + } + } + + start_of_line = from_byte == b'\n'; + if start_of_line { + at_line += 1; + } + + at_byte += 1; + + if let Some(max_bytes) = params.max_bytes { + if at_byte > max_bytes { + break; + } + } + } + + // Notify our readers about the bytes we went over. + from.consume(consumed); + to.consume(consumed); + } + + if params.verbose && !verbose_diffs.is_empty() { + report_verbose_diffs(verbose_diffs, params)?; + return Ok(Cmp::Different); + } + + Ok(Cmp::Equal) +} + +// Exit codes are documented at +// https://www.gnu.org/software/diffutils/manual/html_node/Invoking-cmp.html +// An exit status of 0 means no differences were found, +// 1 means some differences were found, +// and 2 means trouble. +pub fn main(opts: Peekable) -> ExitCode { + let params = match parse_params(opts) { + Ok(param) => param, + Err(e) => { + eprintln!("{e}"); + return ExitCode::from(2); + } + }; + + if params.from == "-" && params.to == "-" + || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) + { + return ExitCode::SUCCESS; + } + + // If the files have different sizes, we already know they are not identical. If we have not + // been asked to show even the first difference, we can quit early. + if params.quiet { + if let (Ok(a_meta), Ok(b_meta)) = (fs::metadata(¶ms.from), fs::metadata(¶ms.to)) { + #[cfg(not(target_os = "windows"))] + if a_meta.size() != b_meta.size() { + return ExitCode::from(1); + } + #[cfg(target_os = "windows")] + if a_meta.file_size() != b_meta.file_size() { + return ExitCode::from(1); + } + } + } + + match cmp(¶ms) { + Ok(Cmp::Equal) => ExitCode::SUCCESS, + Ok(Cmp::Different) => ExitCode::from(1), + Err(e) => { + if !params.quiet { + eprintln!("{e}"); + } + ExitCode::from(2) + } + } +} + +#[inline] +fn is_ascii_printable(byte: u8) -> bool { + let c = byte as char; + c.is_ascii() && !c.is_ascii_control() +} + +#[inline] +fn format_byte(byte: u8) -> String { + let mut byte = byte; + let mut quoted = vec![]; + + if !is_ascii_printable(byte) { + if byte >= 128 { + quoted.push(b'M'); + quoted.push(b'-'); + byte -= 128; + } + + if byte < 32 { + quoted.push(b'^'); + byte += 64; + } else if byte == 127 { + quoted.push(b'^'); + byte = b'?'; + } + assert!((byte as char).is_ascii()); + } + + quoted.push(byte); + + // SAFETY: the checks and shifts we do above match what cat and GNU + // cmp do to ensure characters fall inside the ascii range. + unsafe { String::from_utf8_unchecked(quoted) } +} + +fn report_verbose_diffs(diffs: Vec<(usize, u8, u8)>, params: &Params) -> Result<(), String> { + assert!(!params.quiet); + + let mut stdout = BufWriter::new(io::stdout().lock()); + if let Some((offset, _, _)) = diffs.last() { + // Obtain the width of the first column from the last byte offset. + let width = format!("{}", offset).len(); + + if params.print_bytes { + for (at_byte, from_byte, to_byte) in diffs { + writeln!( + stdout, + "{:>width$} {:>3o} {:4} {:>3o} {}", + at_byte, + from_byte, + format_byte(from_byte), + to_byte, + format_byte(to_byte), + ) + .map_err(|e| { + format!( + "{}: error printing output: {e}", + params.executable.to_string_lossy() + ) + })?; + } + } else { + for (at_byte, from_byte, to_byte) in diffs { + writeln!( + stdout, + "{:>width$} {:>3o} {:>3o}", + at_byte, + from_byte, + to_byte, + width = width + ) + .map_err(|e| { + format!( + "{}: error printing output: {e}", + params.executable.to_string_lossy() + ) + })?; + } + } + } + + Ok(()) +} + +#[inline] +fn report_eof(at_byte: usize, at_line: usize, start_of_line: bool, eof_on: &str, params: &Params) { + if params.quiet { + return; + } + + if at_byte == 1 { + eprintln!( + "{}: EOF on '{}' which is empty", + params.executable.to_string_lossy(), + eof_on + ); + } else if params.verbose { + eprintln!( + "{}: EOF on '{}' after byte {}", + params.executable.to_string_lossy(), + eof_on, + at_byte - 1, + ); + } else if start_of_line { + eprintln!( + "{}: EOF on '{}' after byte {}, line {}", + params.executable.to_string_lossy(), + eof_on, + at_byte - 1, + at_line - 1 + ); + } else { + eprintln!( + "{}: EOF on '{}' after byte {}, in line {}", + params.executable.to_string_lossy(), + eof_on, + at_byte - 1, + at_line + ); + } +} + +fn is_posix_locale() -> bool { + let locale = if let Ok(locale) = env::var("LC_ALL") { + locale + } else if let Ok(locale) = env::var("LC_MESSAGES") { + locale + } else if let Ok(locale) = env::var("LANG") { + locale + } else { + "C".to_string() + }; + + locale == "C" || locale == "POSIX" +} + +#[inline] +fn report_difference(from_byte: u8, to_byte: u8, at_byte: usize, at_line: usize, params: &Params) { + if params.quiet { + return; + } + + let term = if is_posix_locale() && !params.print_bytes { + "char" + } else { + "byte" + }; + print!( + "{} {} differ: {term} {}, line {}", + ¶ms.from.to_string_lossy(), + ¶ms.to.to_string_lossy(), + at_byte, + at_line + ); + if params.print_bytes { + let char_width = if to_byte >= 0x7F { 2 } else { 1 }; + print!( + " is {:>3o} {:char_width$} {:>3o} {:char_width$}", + from_byte, + format_byte(from_byte), + to_byte, + format_byte(to_byte) + ); + } + println!(); +} + +#[cfg(test)] +mod tests { + use super::*; + fn os(s: &str) -> OsString { + OsString::from(s) + } + + #[test] + fn positional() { + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + ..Default::default() + }), + parse_params([os("cmp"), os("foo"), os("bar")].iter().cloned().peekable()) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("-"), + ..Default::default() + }), + parse_params([os("cmp"), os("foo")].iter().cloned().peekable()) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("--help"), + ..Default::default() + }), + parse_params( + [os("cmp"), os("foo"), os("--"), os("--help")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: None, + ..Default::default() + }), + parse_params( + [os("cmp"), os("foo"), os("bar"), os("1")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: Some(usize::MAX), + ..Default::default() + }), + parse_params( + [os("cmp"), os("foo"), os("bar"), os("1"), os("2Y")] + .iter() + .cloned() + .peekable() + ) + ); + + // Bad positional arguments. + assert_eq!( + Err("Usage: cmp ".to_string()), + parse_params( + [os("cmp"), os("foo"), os("bar"), os("1"), os("2"), os("3")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Err("Usage: cmp ".to_string()), + parse_params([os("cmp")].iter().cloned().peekable()) + ); + } + + #[test] + fn execution_modes() { + let print_bytes = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + print_bytes: true, + ..Default::default() + }; + assert_eq!( + Ok(print_bytes.clone()), + parse_params( + [os("cmp"), os("-b"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(print_bytes), + parse_params( + [os("cmp"), os("--print-bytes"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + let verbose = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + verbose: true, + ..Default::default() + }; + assert_eq!( + Ok(verbose.clone()), + parse_params( + [os("cmp"), os("-l"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(verbose), + parse_params( + [os("cmp"), os("--verbose"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + let verbose_and_print_bytes = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + print_bytes: true, + verbose: true, + ..Default::default() + }; + assert_eq!( + Ok(verbose_and_print_bytes.clone()), + parse_params( + [os("cmp"), os("-l"), os("-b"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(verbose_and_print_bytes.clone()), + parse_params( + [os("cmp"), os("-lb"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(verbose_and_print_bytes), + parse_params( + [os("cmp"), os("-bl"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + quiet: true, + ..Default::default() + }), + parse_params( + [os("cmp"), os("-s"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + // Some options do not mix. + assert_eq!( + Err("cmp: options -l and -s are incompatible".to_string()), + parse_params( + [os("cmp"), os("-l"), os("-s"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + } + + #[test] + fn max_bytes() { + let max_bytes = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + max_bytes: Some(1), + ..Default::default() + }; + assert_eq!( + Ok(max_bytes.clone()), + parse_params( + [os("cmp"), os("-n"), os("1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(max_bytes), + parse_params( + [os("cmp"), os("--bytes=1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + max_bytes: Some(usize::MAX), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("--bytes=99999999999999999999999999999999999999999999999999999999999"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + ); + + // Failure case + assert_eq!( + Err("cmp: invalid --bytes value '1K'".to_string()), + parse_params( + [os("cmp"), os("--bytes=1K"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + } + + #[test] + fn skips() { + let skips = Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: Some(1), + ..Default::default() + }; + assert_eq!( + Ok(skips.clone()), + parse_params( + [os("cmp"), os("-i"), os("1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Ok(skips), + parse_params( + [os("cmp"), os("--ignore-initial=1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(usize::MAX), + skip_b: Some(usize::MAX), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("-i"), + os("99999999999999999999999999999999999999999999999999999999999"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: Some(2), + ..Default::default() + }), + parse_params( + [os("cmp"), os("--ignore-initial=1:2"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1_000_000_000), + skip_b: Some(1_152_921_504_606_846_976 * 2), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("--ignore-initial=1GB:2E"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + ); + + // All special suffixes. + for (i, suffixes) in [ + ["kB", "K"], + ["MB", "M"], + ["GB", "G"], + ["TB", "T"], + ["PB", "P"], + ["EB", "E"], + ["ZB", "Z"], + ["YB", "Y"], + ] + .iter() + .enumerate() + { + let values = [ + 1_000usize.checked_pow((i + 1) as u32).unwrap_or(usize::MAX), + 1024usize.checked_pow((i + 1) as u32).unwrap_or(usize::MAX), + ]; + for (j, v) in values.iter().enumerate() { + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(*v), + skip_b: Some(2), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("-i"), + os(&format!("1{}:2", suffixes[j])), + os("foo"), + os("bar"), + ] + .iter() + .cloned() + .peekable() + ) + ); + } + } + + // Ignores positional arguments when -i is provided. + assert_eq!( + Ok(Params { + executable: os("cmp"), + from: os("foo"), + to: os("bar"), + skip_a: Some(1), + skip_b: Some(2), + ..Default::default() + }), + parse_params( + [ + os("cmp"), + os("-i"), + os("1:2"), + os("foo"), + os("bar"), + os("3"), + os("4") + ] + .iter() + .cloned() + .peekable() + ) + ); + + // Failure cases + assert_eq!( + Err("cmp: invalid --ignore-initial value '1mb'".to_string()), + parse_params( + [os("cmp"), os("--ignore-initial=1mb"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Err("cmp: invalid --ignore-initial value '1:2:3'".to_string()), + parse_params( + [ + os("cmp"), + os("--ignore-initial=1:2:3"), + os("foo"), + os("bar") + ] + .iter() + .cloned() + .peekable() + ) + ); + assert_eq!( + Err("cmp: invalid --ignore-initial value '-1'".to_string()), + parse_params( + [os("cmp"), os("--ignore-initial=-1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + ); + } +} diff --git a/src/diff.rs b/src/diff.rs index 6998e2b..f769a29 100644 --- a/src/diff.rs +++ b/src/diff.rs @@ -18,7 +18,7 @@ use std::process::{exit, ExitCode}; // An exit status of 0 means no differences were found, // 1 means some differences were found, // and 2 means trouble. -pub(crate) fn main(opts: Peekable) -> ExitCode { +pub fn main(opts: Peekable) -> ExitCode { let params = parse_params(opts).unwrap_or_else(|error| { eprintln!("{error}"); exit(2); diff --git a/src/lib.rs b/src/lib.rs index 0bb911b..a20ac56 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +pub mod cmp; pub mod context_diff; pub mod ed_diff; pub mod macros; diff --git a/src/main.rs b/src/main.rs index 824b45c..8194d00 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,12 +5,13 @@ use std::{ env::ArgsOs, - ffi::OsString, + ffi::{OsStr, OsString}, iter::Peekable, path::{Path, PathBuf}, process::ExitCode, }; +mod cmp; mod context_diff; mod diff; mod ed_diff; @@ -29,8 +30,10 @@ fn binary_path(args: &mut Peekable) -> PathBuf { } } -fn name(binary_path: &Path) -> Option<&str> { - binary_path.file_stem()?.to_str() +/// #Panics +/// Panics if path has no UTF-8 valid name +fn name(binary_path: &Path) -> &OsStr { + binary_path.file_stem().unwrap() } const VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -39,12 +42,12 @@ fn usage(name: &str) { println!("{name} {VERSION} (multi-call binary)\n"); println!("Usage: {name} [function [arguments...]]\n"); println!("Currently defined functions:\n"); - println!(" diff\n"); + println!(" cmp, diff\n"); } -fn second_arg_error(name: &str) -> ! { - println!("Expected utility name as second argument, got nothing."); - usage(name); +fn second_arg_error(name: &OsStr) -> ! { + eprintln!("Expected utility name as second argument, got nothing."); + usage(&name.to_string_lossy()); std::process::exit(0); } @@ -52,10 +55,7 @@ fn main() -> ExitCode { let mut args = std::env::args_os().peekable(); let exe_path = binary_path(&mut args); - let exe_name = name(&exe_path).unwrap_or_else(|| { - usage(""); - std::process::exit(1); - }); + let exe_name = name(&exe_path); let util_name = if exe_name == "diffutils" { // Discard the item we peeked. @@ -70,9 +70,10 @@ fn main() -> ExitCode { match util_name.to_str() { Some("diff") => diff::main(args), + Some("cmp") => cmp::main(args), Some(name) => { - usage(&format!("{}: utility not supported", name)); - ExitCode::from(1) + eprintln!("{}: utility not supported", name); + ExitCode::from(2) } None => second_arg_error(exe_name), } diff --git a/src/utils.rs b/src/utils.rs index a216784..88b39ff 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -72,19 +72,30 @@ pub fn get_modification_time(file_path: &str) -> String { modification_time } -pub fn report_failure_to_read_input_file( +pub fn format_failure_to_read_input_file( executable: &OsString, filepath: &OsString, error: &std::io::Error, -) { +) -> String { // std::io::Error's display trait outputs "{detail} (os error {code})" // but we want only the {detail} (error string) part let error_code_re = Regex::new(r"\ \(os\ error\ \d+\)$").unwrap(); - eprintln!( + format!( "{}: {}: {}", executable.to_string_lossy(), filepath.to_string_lossy(), error_code_re.replace(error.to_string().as_str(), ""), + ) +} + +pub fn report_failure_to_read_input_file( + executable: &OsString, + filepath: &OsString, + error: &std::io::Error, +) { + eprintln!( + "{}", + format_failure_to_read_input_file(executable, filepath, error) ); } diff --git a/tests/integration.rs b/tests/integration.rs index 2b3fd4f..4cff8ff 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -4,314 +4,869 @@ // files that was distributed with this source code. use assert_cmd::cmd::Command; -use diffutilslib::assert_diff_eq; use predicates::prelude::*; -use std::fs::File; +use std::fs::{File, OpenOptions}; use std::io::Write; use tempfile::{tempdir, NamedTempFile}; // Integration tests for the diffutils command +mod common { + use super::*; -#[test] -fn unknown_param() -> Result<(), Box> { - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg("--foobar"); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::starts_with("Unknown option: \"--foobar\"")); - Ok(()) -} + #[test] + fn unknown_param() -> Result<(), Box> { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("patch"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::eq("patch: utility not supported\n")); -#[test] -fn cannot_read_files() -> Result<(), Box> { - let file = NamedTempFile::new()?; + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::starts_with( + "Expected utility name as second argument, got nothing.\n", + )); + + for subcmd in ["diff", "cmp"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg(subcmd); + cmd.arg("--foobar"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::starts_with("Unknown option: \"--foobar\"")); + } + Ok(()) + } - let nofile = NamedTempFile::new()?; - let nopath = nofile.into_temp_path(); - std::fs::remove_file(&nopath)?; + #[test] + fn cannot_read_files() -> Result<(), Box> { + let file = NamedTempFile::new()?; + + let nofile = NamedTempFile::new()?; + let nopath = nofile.into_temp_path(); + std::fs::remove_file(&nopath)?; + + #[cfg(not(windows))] + let error_message = "No such file or directory"; + #[cfg(windows)] + let error_message = "The system cannot find the file specified."; + + for subcmd in ["diff", "cmp"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg(subcmd); + cmd.arg(&nopath).arg(file.path()); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::ends_with(format!( + ": {}: {error_message}\n", + &nopath.as_os_str().to_string_lossy() + ))); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg(subcmd); + cmd.arg(file.path()).arg(&nopath); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::ends_with(format!( + ": {}: {error_message}\n", + &nopath.as_os_str().to_string_lossy() + ))); + } - #[cfg(not(windows))] - let error_message = "No such file or directory"; - #[cfg(windows)] - let error_message = "The system cannot find the file specified."; - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg(&nopath).arg(file.path()); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::ends_with(format!( - ": {}: {error_message}\n", - &nopath.as_os_str().to_string_lossy() - ))); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg(file.path()).arg(&nopath); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::ends_with(format!( - ": {}: {error_message}\n", - &nopath.as_os_str().to_string_lossy() - ))); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg(&nopath).arg(&nopath); - cmd.assert().code(predicate::eq(2)).failure().stderr( - predicate::str::contains(format!( - ": {}: {error_message}\n", - &nopath.as_os_str().to_string_lossy() - )) - .count(2), - ); - - Ok(()) + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg(&nopath).arg(&nopath); + cmd.assert().code(predicate::eq(2)).failure().stderr( + predicate::str::contains(format!( + ": {}: {error_message}\n", + &nopath.as_os_str().to_string_lossy() + )) + .count(2), + ); + + Ok(()) + } } -#[test] -fn no_differences() -> Result<(), Box> { - let file = NamedTempFile::new()?; - for option in ["", "-u", "-c", "-e"] { +mod diff { + use diffutilslib::assert_diff_eq; + + use super::*; + + #[test] + fn no_differences() -> Result<(), Box> { + let file = NamedTempFile::new()?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg(file.path()).arg(file.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::str::is_empty()); + } + Ok(()) + } + + #[test] + fn no_differences_report_identical_files() -> Result<(), Box> { + // same file + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-s").arg(file1.path()).arg(file1.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::eq(format!( + "Files {} and {} are identical\n", + file1.path().to_string_lossy(), + file1.path().to_string_lossy(), + ))); + } + // two files with the same content + let mut file2 = NamedTempFile::new()?; + file2.write_all("foo\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-s").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::eq(format!( + "Files {} and {} are identical\n", + file1.path().to_string_lossy(), + file2.path().to_string_lossy(), + ))); + } + Ok(()) + } + + #[test] + fn differences() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::is_empty().not()); + } + Ok(()) + } + + #[test] + fn differences_brief() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-q").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::eq(format!( + "Files {} and {} differ\n", + file1.path().to_string_lossy(), + file2.path().to_string_lossy() + ))); + } + Ok(()) + } + + #[test] + fn missing_newline() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar".as_bytes())?; let mut cmd = Command::cargo_bin("diffutils")?; cmd.arg("diff"); - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg(file.path()).arg(file.path()); + cmd.arg("-e").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::starts_with("No newline at end of file")); + Ok(()) + } + + #[test] + fn read_from_stdin() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u") + .arg(file1.path()) + .arg("-") + .write_stdin("bar\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ -\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file1.path().to_string_lossy() + ) + ); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u") + .arg("-") + .arg(file2.path()) + .write_stdin("foo\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- -\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file2.path().to_string_lossy() + ) + ); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u").arg("-").arg("-"); cmd.assert() .code(predicate::eq(0)) .success() .stdout(predicate::str::is_empty()); + + #[cfg(unix)] + { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u") + .arg(file1.path()) + .arg("/dev/stdin") + .write_stdin("bar\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ /dev/stdin\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file1.path().to_string_lossy() + ) + ); + } + + Ok(()) } - Ok(()) -} -#[test] -fn no_differences_report_identical_files() -> Result<(), Box> { - // same file - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { + #[test] + fn compare_file_to_directory() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let directory = tmp_dir.path().join("d"); + let _ = std::fs::create_dir(&directory); + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let da_path = directory.join("a"); + let mut da = File::create(&da_path).unwrap(); + da.write_all(b"da\n").unwrap(); + let mut cmd = Command::cargo_bin("diffutils")?; cmd.arg("diff"); - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg("-s").arg(file1.path()).arg(file1.path()); + cmd.arg("-u").arg(&directory).arg(&a_path); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-da\n+a\n", + da_path.display(), + a_path.display() + ) + ); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("diff"); + cmd.arg("-u").arg(&a_path).arg(&directory); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-a\n+da\n", + a_path.display(), + da_path.display() + ) + ); + + Ok(()) + } +} + +mod cmp { + use super::*; + + #[test] + fn cmp_incompatible_params() -> Result<(), Box> { + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-s"); + cmd.arg("/etc/passwd").arg("/etc/group"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::ends_with( + ": options -l and -s are incompatible\n", + )); + + Ok(()) + } + + #[test] + fn cmp_stdin() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg(&a_path); + cmd.write_stdin("a\n"); cmd.assert() .code(predicate::eq(0)) .success() - .stdout(predicate::eq(format!( - "Files {} and {} are identical\n", - file1.path().to_string_lossy(), - file1.path().to_string_lossy(), - ))); + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path); + cmd.write_stdin("b\n"); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with(" - differ: char 1, line 1\n")); + + Ok(()) } - // two files with the same content - let mut file2 = NamedTempFile::new()?; - file2.write_all("foo\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { + + #[test] + fn cmp_equal_files() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"a\n").unwrap(); + let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg("-s").arg(file1.path()).arg(file2.path()); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); cmd.assert() .code(predicate::eq(0)) .success() - .stdout(predicate::eq(format!( - "Files {} and {} are identical\n", - file1.path().to_string_lossy(), - file2.path().to_string_lossy(), - ))); + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + Ok(()) } - Ok(()) -} -#[test] -fn differences() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { + #[test] + fn cmp_one_file_empty() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let _ = File::create(&b_path).unwrap(); + let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg(file1.path()).arg(file2.path()); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); cmd.assert() .code(predicate::eq(1)) .failure() - .stdout(predicate::str::is_empty().not()); + .stderr(predicate::str::contains(" EOF on ")) + .stderr(predicate::str::ends_with(" which is empty\n")); + + Ok(()) } - Ok(()) -} -#[test] -fn differences_brief() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { + #[test] + fn cmp_immediate_difference() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"bcd\n").unwrap(); + let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg("-q").arg(file1.path()).arg(file2.path()); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); cmd.assert() .code(predicate::eq(1)) .failure() - .stdout(predicate::eq(format!( - "Files {} and {} differ\n", - file1.path().to_string_lossy(), - file2.path().to_string_lossy() - ))); + .stdout(predicate::str::ends_with(" differ: char 1, line 1\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with( + " differ: byte 1, line 1 is 141 a 142 b\n", + )); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq("1 141 142\n2 142 143\n3 143 144\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq( + "1 141 a 142 b\n2 142 b 143 c\n3 143 c 144 d\n", + )); + + Ok(()) } - Ok(()) -} -#[test] -fn missing_newline() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar".as_bytes())?; - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg("-e").arg(file1.path()).arg(file2.path()); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::starts_with("No newline at end of file")); - Ok(()) -} + #[test] + fn cmp_newline_difference() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc\ndefg").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"abc\ndef\ng").unwrap(); -#[test] -fn read_from_stdin() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar\n".as_bytes())?; - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg("-u") - .arg(file1.path()) - .arg("-") - .write_stdin("bar\n"); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ -\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", - file1.path().to_string_lossy() - ) - ); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg("-u") - .arg("-") - .arg(file2.path()) - .write_stdin("foo\n"); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- -\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", - file2.path().to_string_lossy() - ) - ); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg("-u").arg("-").arg("-"); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stdout(predicate::str::is_empty()); - - #[cfg(unix)] - { let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg("-u") - .arg(file1.path()) - .arg("/dev/stdin") - .write_stdin("bar\n"); - cmd.assert().code(predicate::eq(1)).failure(); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with(" differ: char 8, line 2\n")); - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ /dev/stdin\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", - file1.path().to_string_lossy() - ) - ); + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with( + " differ: byte 8, line 2 is 147 g 12 ^J\n", + )); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::starts_with("8 147 12\n")) + .stderr(predicate::str::contains(" EOF on")) + .stderr(predicate::str::ends_with(" after byte 8\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg("-l"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::starts_with("8 147 g 12 ^J\n")) + .stderr(predicate::str::contains(" EOF on")) + .stderr(predicate::str::ends_with(" after byte 8\n")); + + Ok(()) } - Ok(()) -} + #[test] + fn cmp_max_bytes() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc efg ijkl\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"abcdefghijkl\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-n"); + cmd.arg("3"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-n"); + cmd.arg("4"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq("4 40 144 d\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-n"); + cmd.arg("13"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq("4 40 144 d\n8 40 150 h\n")); + Ok(()) + } + + #[test] + fn cmp_skip_args_parsing() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"---abc\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"###abc\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-i"); + cmd.arg("3"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + // Positional skips should be ignored + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-i"); + cmd.arg("3"); + cmd.arg(&a_path).arg(&b_path); + cmd.arg("1").arg("1"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + // Single positional argument should only affect first file. + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.arg("3"); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with(" differ: char 1, line 1\n")); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.arg("3"); + cmd.arg("3"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + Ok(()) + } + + #[test] + fn cmp_skip_suffix_parsing() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + write!(a, "{}c\n", "a".repeat(1024)).unwrap(); + a.flush().unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + write!(b, "{}c\n", "b".repeat(1024)).unwrap(); + b.flush().unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("--ignore-initial=1K"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + Ok(()) + } + + #[test] + fn cmp_skip() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc efg ijkl\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"abcdefghijkl\n").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-i"); + cmd.arg("8"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg("-i"); + cmd.arg("4"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with( + " differ: byte 4, line 1 is 40 150 h\n", + )); + + Ok(()) + } + + #[test] + fn cmp_binary() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let mut bytes = vec![0, 15, 31, 32, 33, 40, 64, 126, 127, 128, 129, 200, 254, 255]; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(&bytes).unwrap(); + + bytes.reverse(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(&bytes).unwrap(); -#[test] -fn compare_file_to_directory() -> Result<(), Box> { - let tmp_dir = tempdir()?; - - let directory = tmp_dir.path().join("d"); - let _ = std::fs::create_dir(&directory); - - let a_path = tmp_dir.path().join("a"); - let mut a = File::create(&a_path).unwrap(); - a.write_all(b"a\n").unwrap(); - - let da_path = directory.join("a"); - let mut da = File::create(&da_path).unwrap(); - da.write_all(b"da\n").unwrap(); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg("-u").arg(&directory).arg(&a_path); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-da\n+a\n", - da_path.display(), - a_path.display() - ) - ); - - let mut cmd = Command::cargo_bin("diffutils")?; - cmd.arg("diff"); - cmd.arg("-u").arg(&a_path).arg(&directory); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-a\n+da\n", - a_path.display(), - da_path.display() - ) - ); - - Ok(()) + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::eq(concat!( + " 1 0 ^@ 377 M-^?\n", + " 2 17 ^O 376 M-~\n", + " 3 37 ^_ 310 M-H\n", + " 4 40 201 M-^A\n", + " 5 41 ! 200 M-^@\n", + " 6 50 ( 177 ^?\n", + " 7 100 @ 176 ~\n", + " 8 176 ~ 100 @\n", + " 9 177 ^? 50 (\n", + "10 200 M-^@ 41 !\n", + "11 201 M-^A 40 \n", + "12 310 M-H 37 ^_\n", + "13 376 M-~ 17 ^O\n", + "14 377 M-^? 0 ^@\n" + ))); + + Ok(()) + } + + #[test] + #[cfg(not(windows))] + fn cmp_fast_paths() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + // This test mimics one found in the GNU cmp test suite. It is used for + // validating the /dev/null optimization. + let a_path = tmp_dir.path().join("a"); + let a = File::create(&a_path).unwrap(); + a.set_len(14 * 1024 * 1024 * 1024 * 1024).unwrap(); + + let b_path = tmp_dir.path().join("b"); + let b = File::create(&b_path).unwrap(); + b.set_len(15 * 1024 * 1024 * 1024 * 1024).unwrap(); + + let dev_null = OpenOptions::new().write(true).open("/dev/null").unwrap(); + + let mut child = std::process::Command::new(assert_cmd::cargo::cargo_bin("diffutils")) + .arg("cmp") + .arg(&a_path) + .arg(&b_path) + .stdout(dev_null) + .spawn() + .unwrap(); + + std::thread::sleep(std::time::Duration::from_millis(100)); + + assert_eq!(child.try_wait().unwrap().unwrap().code(), Some(1)); + + // Two stdins should be equal + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg("-"); + cmd.arg("-"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::str::is_empty()) + .stderr(predicate::str::is_empty()); + + // Files with longer than block size equal segments should still report + // the correct line number for the difference. Assumes 8KB block size (see + // https://github.com/rust-lang/rust/blob/master/library/std/src/sys_common/io.rs), + // create a 24KB equality. + let mut bytes = " ".repeat(4095); + bytes.push('\n'); + bytes.push_str(&" ".repeat(4096)); + + let bytes = bytes.repeat(3); + let bytes = bytes.as_bytes(); + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(&bytes).unwrap(); + a.write_all(b"A").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(&bytes).unwrap(); + b.write_all(b"B").unwrap(); + + let mut cmd = Command::cargo_bin("diffutils")?; + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::ends_with(" differ: byte 24577, line 4\n")); + + Ok(()) + } } diff --git a/tests/run-upstream-testsuite.sh b/tests/run-upstream-testsuite.sh index cb59834..f75b0b3 100755 --- a/tests/run-upstream-testsuite.sh +++ b/tests/run-upstream-testsuite.sh @@ -21,7 +21,7 @@ # (e.g. 'dev' or 'test'). # Unless overridden by the $TESTS environment variable, all tests in the test # suite will be run. Tests targeting a command that is not yet implemented -# (e.g. cmp, diff3 or sdiff) are skipped. +# (e.g. diff3 or sdiff) are skipped. scriptpath=$(dirname "$(readlink -f "$0")") rev=$(git rev-parse HEAD) @@ -57,6 +57,7 @@ upstreamrev=$(git rev-parse HEAD) mkdir src cd src ln -s "$binary" diff +ln -s "$binary" cmp cd ../tests if [[ -n "$TESTS" ]] @@ -82,9 +83,9 @@ for test in $tests do result="FAIL" url="$urlroot$test?id=$upstreamrev" - # Run only the tests that invoke `diff`, + # Run only the tests that invoke `diff` or `cmp`, # because other binaries aren't implemented yet - if ! grep -E -s -q "(cmp|diff3|sdiff)" "$test" + if ! grep -E -s -q "(diff3|sdiff)" "$test" then sh "$test" 1> stdout.txt 2> stderr.txt && result="PASS" || exitcode=1 json+="{\"test\":\"$test\",\"result\":\"$result\","