Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Correctly detect format when using md5sum with check flag #4645

Merged
merged 9 commits into from
May 27, 2023
54 changes: 42 additions & 12 deletions src/uu/hashsum/src/hashsum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -604,19 +604,28 @@ where
// regular expression, otherwise we use the `{n}` modifier,
// where `n` is the number of bytes.
let bytes = options.digest.output_bits() / 4;
let modifier = if bytes > 0 {
let bytes_marker = if bytes > 0 {
format!("{{{bytes}}}")
} else {
"+".to_string()
};
let gnu_re = Regex::new(&format!(
r"^(?P<digest>[a-fA-F0-9]{modifier}) (?P<binary>[ \*])(?P<fileName>.*)",
))
.map_err(|_| HashsumError::InvalidRegex)?;
// BSD reversed mode format is similar to the default mode, but doesn’t use a character to distinguish binary and text modes.
let mut bsd_reversed = None;

fn gnu_re_template(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you please add a rustdoc comment here? thanks

bytes_marker: &str,
format_marker: &str,
) -> Result<Regex, HashsumError> {
Regex::new(&format!(
r"^(?P<digest>[a-fA-F0-9]{bytes_marker}) {format_marker}(?P<fileName>.*)"
))
.map_err(|_| HashsumError::InvalidRegex)
}
let mut gnu_re = gnu_re_template(&bytes_marker, r"(?P<binary>[ \*])?")?;
let bsd_re = Regex::new(&format!(
r"^{algorithm} \((?P<fileName>.*)\) = (?P<digest>[a-fA-F0-9]{digest_size})",
algorithm = options.algoname,
digest_size = modifier,
digest_size = bytes_marker,
))
.map_err(|_| HashsumError::InvalidRegex)?;

Expand All @@ -627,11 +636,32 @@ where
Err(e) => return Err(e.map_err_context(|| "failed to read file".to_string())),
};
let (ck_filename, sum, binary_check) = match gnu_re.captures(&line) {
Some(caps) => (
caps.name("fileName").unwrap().as_str(),
caps.name("digest").unwrap().as_str().to_ascii_lowercase(),
caps.name("binary").unwrap().as_str() == "*",
),
Some(caps) => {
// We set the format during first pass, as mixing of formats is disallowed.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe move this into a function like

fn handle_captures(caps: Captures, bytes_marker: &str, bsd_reversed: &mut Option<bool>) -> Result<(String, String, bool), HashsumError> {
    if bsd_reversed.is_none() {
        let is_bsd_reversed = caps.name("binary").is_none();
        let format_marker = if is_bsd_reversed {
            ""
        } else {
            r"(?P<binary>[ \*])"
        }
        .to_string();

        *bsd_reversed = Some(is_bsd_reversed);
        gnu_re = gnu_re_template(&bytes_marker, &format_marker)?;
    }

    Ok((
        caps.name("fileName").unwrap().as_str().to_string(),
        caps.name("digest").unwrap().as_str().to_ascii_lowercase(),
        if *bsd_reversed == Some(false) {
            caps.name("binary").unwrap().as_str() == "*"
        } else {
            false
        },
    ))
}

and call it with
Some(caps) => handle_captures(caps, &bytes_marker, &mut bsd_reversed)?,

if bsd_reversed.is_none() {
let is_bsd_reversed = caps.name("binary").is_none();
let format_marker = if is_bsd_reversed {
""
} else {
r"(?P<binary>[ \*])"
}
.to_string();

bsd_reversed = Some(is_bsd_reversed);
gnu_re = gnu_re_template(&bytes_marker, &format_marker)?;
}

// Default mode for Reversed BSD format is 'text'
(
caps.name("fileName").unwrap().as_str(),
kamilogorek marked this conversation as resolved.
Show resolved Hide resolved
caps.name("digest").unwrap().as_str().to_ascii_lowercase(),
if bsd_reversed == Some(false) {
caps.name("binary").unwrap().as_str() == "*"
} else {
false
},
)
}
None => match bsd_re.captures(&line) {
Some(caps) => (
caps.name("fileName").unwrap().as_str(),
Expand Down Expand Up @@ -706,7 +736,7 @@ where
)
.map_err_context(|| "failed to read input".to_string())?;
if options.tag {
println!("{} ({}) = {}", options.algoname, filename.display(), sum);
println!("{} ({:?}) = {}", options.algoname, filename.display(), sum);
} else if options.nonames {
println!("{sum}");
} else if options.zero {
Expand Down
138 changes: 138 additions & 0 deletions tests/by-util/test_hashsum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,144 @@ fn test_check_file_not_found_warning() {
.stderr_is("sha1sum: warning: 1 listed file could not be read\n");
}

// Asterisk `*` is a reserved paths character on win32, nor the path can end with a whitespace.
// ref: https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions
#[test]
fn test_check_md5sum() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;

#[cfg(not(windows))]
{
for f in &["a", " b", "*c", "dd", " "] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"60b725f10c9c85c70d97880dfe8191b3 a\n\
bf35d7536c785cf06730d5a40301eba2 b\n\
f5b61709718c1ecf8db1aea8547d4698 *c\n\
b064a020db8018f18ff5ae367d01b212 dd\n\
d784fa8b6d98d27699781bd9a7cf19f0 ",
);
scene
.ccmd("md5sum")
.arg("--strict")
.arg("-c")
.arg("check.md5sum")
.succeeds()
.stdout_is("a: OK\n b: OK\n*c: OK\ndd: OK\n : OK\n")
.stderr_is("");
}
#[cfg(windows)]
{
for f in &["a", " b", "dd"] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"60b725f10c9c85c70d97880dfe8191b3 a\n\
bf35d7536c785cf06730d5a40301eba2 b\n\
b064a020db8018f18ff5ae367d01b212 dd",
);
scene
.ccmd("md5sum")
.arg("--strict")
.arg("-c")
.arg("check.md5sum")
.succeeds()
.stdout_is("a: OK\n b: OK\ndd: OK\n")
.stderr_is("");
}
}

#[test]
fn test_check_md5sum_reverse_bsd() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;

#[cfg(not(windows))]
{
for f in &["a", " b", "*c", "dd", " "] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"60b725f10c9c85c70d97880dfe8191b3 a\n\
bf35d7536c785cf06730d5a40301eba2 b\n\
f5b61709718c1ecf8db1aea8547d4698 *c\n\
b064a020db8018f18ff5ae367d01b212 dd\n\
d784fa8b6d98d27699781bd9a7cf19f0 ",
);
scene
.ccmd("md5sum")
.arg("--strict")
.arg("-c")
.arg("check.md5sum")
.succeeds()
.stdout_is("a: OK\n b: OK\n*c: OK\ndd: OK\n : OK\n")
.stderr_is("");
}
#[cfg(windows)]
{
for f in &["a", " b", "dd"] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"60b725f10c9c85c70d97880dfe8191b3 a\n\
bf35d7536c785cf06730d5a40301eba2 b\n\
b064a020db8018f18ff5ae367d01b212 dd",
);
scene
.ccmd("md5sum")
.arg("--strict")
.arg("-c")
.arg("check.md5sum")
.succeeds()
.stdout_is("a: OK\n b: OK\ndd: OK\n")
.stderr_is("");
}
}

#[test]
fn test_check_md5sum_mixed_format() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;

#[cfg(not(windows))]
{
for f in &[" b", "*c", "dd", " "] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"bf35d7536c785cf06730d5a40301eba2 b\n\
f5b61709718c1ecf8db1aea8547d4698 *c\n\
b064a020db8018f18ff5ae367d01b212 dd\n\
d784fa8b6d98d27699781bd9a7cf19f0 ",
);
}
#[cfg(windows)]
{
for f in &[" b", "dd"] {
at.write(f, &format!("{f}\n"));
}
at.write(
"check.md5sum",
"bf35d7536c785cf06730d5a40301eba2 b\n\
b064a020db8018f18ff5ae367d01b212 dd",
);
}
scene
.ccmd("md5sum")
.arg("--strict")
.arg("-c")
.arg("check.md5sum")
.fails()
.code_is(1);
}

#[test]
fn test_invalid_arg() {
new_ucmd!().arg("--definitely-invalid").fails().code_is(1);
Expand Down