Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Correctly detect format when using md5sum with check flag #4645

Merged
merged 9 commits into from
May 27, 2023
70 changes: 56 additions & 14 deletions src/uu/hashsum/src/hashsum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use clap::crate_version;
use clap::ArgAction;
use clap::{Arg, ArgMatches, Command};
use hex::encode;
use regex::Captures;
use regex::Regex;
use std::cmp::Ordering;
use std::error::Error;
Expand Down Expand Up @@ -604,37 +605,78 @@ where
// regular expression, otherwise we use the `{n}` modifier,
// where `n` is the number of bytes.
let bytes = options.digest.output_bits() / 4;
let modifier = if bytes > 0 {
let bytes_marker = if bytes > 0 {
format!("{{{bytes}}}")
} else {
"+".to_string()
};
let gnu_re = Regex::new(&format!(
r"^(?P<digest>[a-fA-F0-9]{modifier}) (?P<binary>[ \*])(?P<fileName>.*)",
))
.map_err(|_| HashsumError::InvalidRegex)?;
// BSD reversed mode format is similar to the default mode, but doesn’t use a character to distinguish binary and text modes.
let mut bsd_reversed = None;

/// Creates a Regex for parsing lines based on the given format.
/// The default value of `gnu_re` created with this function has to be recreated
/// after the initial line has been parsed, as this line dictates the format
/// for the rest of them, and mixing of formats is disallowed.
fn gnu_re_template(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you please add a rustdoc comment here? thanks

bytes_marker: &str,
format_marker: &str,
) -> Result<Regex, HashsumError> {
Regex::new(&format!(
r"^(?P<digest>[a-fA-F0-9]{bytes_marker}) {format_marker}(?P<fileName>.*)"
))
.map_err(|_| HashsumError::InvalidRegex)
}
let mut gnu_re = gnu_re_template(&bytes_marker, r"(?P<binary>[ \*])?")?;
let bsd_re = Regex::new(&format!(
r"^{algorithm} \((?P<fileName>.*)\) = (?P<digest>[a-fA-F0-9]{digest_size})",
algorithm = options.algoname,
digest_size = modifier,
digest_size = bytes_marker,
))
.map_err(|_| HashsumError::InvalidRegex)?;

fn handle_captures(
caps: Captures,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you please fix the clippy warnings ? (sorry)

bytes_marker: &str,
bsd_reversed: &mut Option<bool>,
gnu_re: &mut Regex,
) -> Result<(String, String, bool), HashsumError> {
if bsd_reversed.is_none() {
let is_bsd_reversed = caps.name("binary").is_none();
let format_marker = if is_bsd_reversed {
""
} else {
r"(?P<binary>[ \*])"
}
.to_string();

*bsd_reversed = Some(is_bsd_reversed);
*gnu_re = gnu_re_template(&bytes_marker, &format_marker)?;
}

Ok((
caps.name("fileName").unwrap().as_str().to_string(),
caps.name("digest").unwrap().as_str().to_ascii_lowercase(),
if *bsd_reversed == Some(false) {
caps.name("binary").unwrap().as_str() == "*"
} else {
false
},
))
}

let buffer = file;
for (i, maybe_line) in buffer.lines().enumerate() {
let line = match maybe_line {
Ok(l) => l,
Err(e) => return Err(e.map_err_context(|| "failed to read file".to_string())),
};
let (ck_filename, sum, binary_check) = match gnu_re.captures(&line) {
Some(caps) => (
caps.name("fileName").unwrap().as_str(),
caps.name("digest").unwrap().as_str().to_ascii_lowercase(),
caps.name("binary").unwrap().as_str() == "*",
),
Some(caps) => {
handle_captures(caps, &bytes_marker, &mut bsd_reversed, &mut gnu_re)?
}
None => match bsd_re.captures(&line) {
Some(caps) => (
caps.name("fileName").unwrap().as_str(),
caps.name("fileName").unwrap().as_str().to_string(),
caps.name("digest").unwrap().as_str().to_ascii_lowercase(),
true,
),
Expand All @@ -655,7 +697,7 @@ where
}
},
};
let f = match File::open(ck_filename) {
let f = match File::open(ck_filename.clone()) {
Err(_) => {
failed_open_file += 1;
println!(
Expand Down Expand Up @@ -706,7 +748,7 @@ where
)
.map_err_context(|| "failed to read input".to_string())?;
if options.tag {
println!("{} ({}) = {}", options.algoname, filename.display(), sum);
println!("{} ({:?}) = {}", options.algoname, filename.display(), sum);
} else if options.nonames {
println!("{sum}");
} else if options.zero {
Expand Down
138 changes: 138 additions & 0 deletions tests/by-util/test_hashsum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,144 @@ fn test_check_file_not_found_warning() {
.stderr_is("sha1sum: warning: 1 listed file could not be read\n");
}

// Asterisk `*` is a reserved paths character on win32, nor the path can end with a whitespace.
// ref: https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions
#[test]
fn test_check_md5sum() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;

#[cfg(not(windows))]
{
for f in &["a", " b", "*c", "dd", " "] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"60b725f10c9c85c70d97880dfe8191b3 a\n\
bf35d7536c785cf06730d5a40301eba2 b\n\
f5b61709718c1ecf8db1aea8547d4698 *c\n\
b064a020db8018f18ff5ae367d01b212 dd\n\
d784fa8b6d98d27699781bd9a7cf19f0 ",
);
scene
.ccmd("md5sum")
.arg("--strict")
.arg("-c")
.arg("check.md5sum")
.succeeds()
.stdout_is("a: OK\n b: OK\n*c: OK\ndd: OK\n : OK\n")
.stderr_is("");
}
#[cfg(windows)]
{
for f in &["a", " b", "dd"] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"60b725f10c9c85c70d97880dfe8191b3 a\n\
bf35d7536c785cf06730d5a40301eba2 b\n\
b064a020db8018f18ff5ae367d01b212 dd",
);
scene
.ccmd("md5sum")
.arg("--strict")
.arg("-c")
.arg("check.md5sum")
.succeeds()
.stdout_is("a: OK\n b: OK\ndd: OK\n")
.stderr_is("");
}
}

#[test]
fn test_check_md5sum_reverse_bsd() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;

#[cfg(not(windows))]
{
for f in &["a", " b", "*c", "dd", " "] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"60b725f10c9c85c70d97880dfe8191b3 a\n\
bf35d7536c785cf06730d5a40301eba2 b\n\
f5b61709718c1ecf8db1aea8547d4698 *c\n\
b064a020db8018f18ff5ae367d01b212 dd\n\
d784fa8b6d98d27699781bd9a7cf19f0 ",
);
scene
.ccmd("md5sum")
.arg("--strict")
.arg("-c")
.arg("check.md5sum")
.succeeds()
.stdout_is("a: OK\n b: OK\n*c: OK\ndd: OK\n : OK\n")
.stderr_is("");
}
#[cfg(windows)]
{
for f in &["a", " b", "dd"] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"60b725f10c9c85c70d97880dfe8191b3 a\n\
bf35d7536c785cf06730d5a40301eba2 b\n\
b064a020db8018f18ff5ae367d01b212 dd",
);
scene
.ccmd("md5sum")
.arg("--strict")
.arg("-c")
.arg("check.md5sum")
.succeeds()
.stdout_is("a: OK\n b: OK\ndd: OK\n")
.stderr_is("");
}
}

#[test]
fn test_check_md5sum_mixed_format() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;

#[cfg(not(windows))]
{
for f in &[" b", "*c", "dd", " "] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"bf35d7536c785cf06730d5a40301eba2 b\n\
f5b61709718c1ecf8db1aea8547d4698 *c\n\
b064a020db8018f18ff5ae367d01b212 dd\n\
d784fa8b6d98d27699781bd9a7cf19f0 ",
);
}
#[cfg(windows)]
{
for f in &[" b", "dd"] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"bf35d7536c785cf06730d5a40301eba2 b\n\
b064a020db8018f18ff5ae367d01b212 dd",
);
}
scene
.ccmd("md5sum")
.arg("--strict")
.arg("-c")
.arg("check.md5sum")
.fails()
.code_is(1);
}

#[test]
fn test_invalid_arg() {
new_ucmd!().arg("--definitely-invalid").fails().code_is(1);
Expand Down