Skip to content

Commit

Permalink
fix: Correctly detect format when using md5sum with check flag (#4645)
Browse files Browse the repository at this point in the history
  • Loading branch information
kamilogorek authored May 27, 2023
1 parent 388fa1b commit 0743464
Show file tree
Hide file tree
Showing 2 changed files with 194 additions and 14 deletions.
70 changes: 56 additions & 14 deletions src/uu/hashsum/src/hashsum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use clap::crate_version;
use clap::ArgAction;
use clap::{Arg, ArgMatches, Command};
use hex::encode;
use regex::Captures;
use regex::Regex;
use std::cmp::Ordering;
use std::error::Error;
Expand Down Expand Up @@ -604,37 +605,78 @@ where
// regular expression, otherwise we use the `{n}` modifier,
// where `n` is the number of bytes.
let bytes = options.digest.output_bits() / 4;
let modifier = if bytes > 0 {
let bytes_marker = if bytes > 0 {
format!("{{{bytes}}}")
} else {
"+".to_string()
};
let gnu_re = Regex::new(&format!(
r"^(?P<digest>[a-fA-F0-9]{modifier}) (?P<binary>[ \*])(?P<fileName>.*)",
))
.map_err(|_| HashsumError::InvalidRegex)?;
// BSD reversed mode format is similar to the default mode, but doesn’t use a character to distinguish binary and text modes.
let mut bsd_reversed = None;

/// Creates a Regex for parsing lines based on the given format.
/// The default value of `gnu_re` created with this function has to be recreated
/// after the initial line has been parsed, as this line dictates the format
/// for the rest of them, and mixing of formats is disallowed.
fn gnu_re_template(
bytes_marker: &str,
format_marker: &str,
) -> Result<Regex, HashsumError> {
Regex::new(&format!(
r"^(?P<digest>[a-fA-F0-9]{bytes_marker}) {format_marker}(?P<fileName>.*)"
))
.map_err(|_| HashsumError::InvalidRegex)
}
let mut gnu_re = gnu_re_template(&bytes_marker, r"(?P<binary>[ \*])?")?;
let bsd_re = Regex::new(&format!(
r"^{algorithm} \((?P<fileName>.*)\) = (?P<digest>[a-fA-F0-9]{digest_size})",
algorithm = options.algoname,
digest_size = modifier,
digest_size = bytes_marker,
))
.map_err(|_| HashsumError::InvalidRegex)?;

fn handle_captures(
caps: &Captures,
bytes_marker: &str,
bsd_reversed: &mut Option<bool>,
gnu_re: &mut Regex,
) -> Result<(String, String, bool), HashsumError> {
if bsd_reversed.is_none() {
let is_bsd_reversed = caps.name("binary").is_none();
let format_marker = if is_bsd_reversed {
""
} else {
r"(?P<binary>[ \*])"
}
.to_string();

*bsd_reversed = Some(is_bsd_reversed);
*gnu_re = gnu_re_template(bytes_marker, &format_marker)?;
}

Ok((
caps.name("fileName").unwrap().as_str().to_string(),
caps.name("digest").unwrap().as_str().to_ascii_lowercase(),
if *bsd_reversed == Some(false) {
caps.name("binary").unwrap().as_str() == "*"
} else {
false
},
))
}

let buffer = file;
for (i, maybe_line) in buffer.lines().enumerate() {
let line = match maybe_line {
Ok(l) => l,
Err(e) => return Err(e.map_err_context(|| "failed to read file".to_string())),
};
let (ck_filename, sum, binary_check) = match gnu_re.captures(&line) {
Some(caps) => (
caps.name("fileName").unwrap().as_str(),
caps.name("digest").unwrap().as_str().to_ascii_lowercase(),
caps.name("binary").unwrap().as_str() == "*",
),
Some(caps) => {
handle_captures(&caps, &bytes_marker, &mut bsd_reversed, &mut gnu_re)?
}
None => match bsd_re.captures(&line) {
Some(caps) => (
caps.name("fileName").unwrap().as_str(),
caps.name("fileName").unwrap().as_str().to_string(),
caps.name("digest").unwrap().as_str().to_ascii_lowercase(),
true,
),
Expand All @@ -655,7 +697,7 @@ where
}
},
};
let f = match File::open(ck_filename) {
let f = match File::open(ck_filename.clone()) {
Err(_) => {
failed_open_file += 1;
println!(
Expand Down Expand Up @@ -706,7 +748,7 @@ where
)
.map_err_context(|| "failed to read input".to_string())?;
if options.tag {
println!("{} ({}) = {}", options.algoname, filename.display(), sum);
println!("{} ({:?}) = {}", options.algoname, filename.display(), sum);
} else if options.nonames {
println!("{sum}");
} else if options.zero {
Expand Down
138 changes: 138 additions & 0 deletions tests/by-util/test_hashsum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,144 @@ fn test_check_file_not_found_warning() {
.stderr_is("sha1sum: warning: 1 listed file could not be read\n");
}

// Asterisk `*` is a reserved paths character on win32, nor the path can end with a whitespace.
// ref: https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions
#[test]
fn test_check_md5sum() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;

#[cfg(not(windows))]
{
for f in &["a", " b", "*c", "dd", " "] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"60b725f10c9c85c70d97880dfe8191b3 a\n\
bf35d7536c785cf06730d5a40301eba2 b\n\
f5b61709718c1ecf8db1aea8547d4698 *c\n\
b064a020db8018f18ff5ae367d01b212 dd\n\
d784fa8b6d98d27699781bd9a7cf19f0 ",
);
scene
.ccmd("md5sum")
.arg("--strict")
.arg("-c")
.arg("check.md5sum")
.succeeds()
.stdout_is("a: OK\n b: OK\n*c: OK\ndd: OK\n : OK\n")
.stderr_is("");
}
#[cfg(windows)]
{
for f in &["a", " b", "dd"] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"60b725f10c9c85c70d97880dfe8191b3 a\n\
bf35d7536c785cf06730d5a40301eba2 b\n\
b064a020db8018f18ff5ae367d01b212 dd",
);
scene
.ccmd("md5sum")
.arg("--strict")
.arg("-c")
.arg("check.md5sum")
.succeeds()
.stdout_is("a: OK\n b: OK\ndd: OK\n")
.stderr_is("");
}
}

#[test]
fn test_check_md5sum_reverse_bsd() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;

#[cfg(not(windows))]
{
for f in &["a", " b", "*c", "dd", " "] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"60b725f10c9c85c70d97880dfe8191b3 a\n\
bf35d7536c785cf06730d5a40301eba2 b\n\
f5b61709718c1ecf8db1aea8547d4698 *c\n\
b064a020db8018f18ff5ae367d01b212 dd\n\
d784fa8b6d98d27699781bd9a7cf19f0 ",
);
scene
.ccmd("md5sum")
.arg("--strict")
.arg("-c")
.arg("check.md5sum")
.succeeds()
.stdout_is("a: OK\n b: OK\n*c: OK\ndd: OK\n : OK\n")
.stderr_is("");
}
#[cfg(windows)]
{
for f in &["a", " b", "dd"] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"60b725f10c9c85c70d97880dfe8191b3 a\n\
bf35d7536c785cf06730d5a40301eba2 b\n\
b064a020db8018f18ff5ae367d01b212 dd",
);
scene
.ccmd("md5sum")
.arg("--strict")
.arg("-c")
.arg("check.md5sum")
.succeeds()
.stdout_is("a: OK\n b: OK\ndd: OK\n")
.stderr_is("");
}
}

#[test]
fn test_check_md5sum_mixed_format() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;

#[cfg(not(windows))]
{
for f in &[" b", "*c", "dd", " "] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"bf35d7536c785cf06730d5a40301eba2 b\n\
f5b61709718c1ecf8db1aea8547d4698 *c\n\
b064a020db8018f18ff5ae367d01b212 dd\n\
d784fa8b6d98d27699781bd9a7cf19f0 ",
);
}
#[cfg(windows)]
{
for f in &[" b", "dd"] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"bf35d7536c785cf06730d5a40301eba2 b\n\
b064a020db8018f18ff5ae367d01b212 dd",
);
}
scene
.ccmd("md5sum")
.arg("--strict")
.arg("-c")
.arg("check.md5sum")
.fails()
.code_is(1);
}

#[test]
fn test_invalid_arg() {
new_ucmd!().arg("--definitely-invalid").fails().code_is(1);
Expand Down

0 comments on commit 0743464

Please sign in to comment.