From ee13bc41e7ec39f2afbc8e33ac36c4f6f94e587d Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Sun, 3 Jul 2022 16:36:43 +0200 Subject: [PATCH] numfmt: implement --format --- src/uu/numfmt/src/format.rs | 93 +++++++++++- src/uu/numfmt/src/numfmt.rs | 54 ++++++- src/uu/numfmt/src/options.rs | 283 +++++++++++++++++++++++++++++++++++ src/uu/numfmt/src/units.rs | 2 +- tests/by-util/test_numfmt.rs | 238 +++++++++++++++++++++++++++++ 5 files changed, 659 insertions(+), 11 deletions(-) diff --git a/src/uu/numfmt/src/format.rs b/src/uu/numfmt/src/format.rs index aa55104a3eb..8bddb7ad181 100644 --- a/src/uu/numfmt/src/format.rs +++ b/src/uu/numfmt/src/format.rs @@ -1,3 +1,4 @@ +// spell-checker:ignore powf use uucore::display::Quotable; use crate::options::{NumfmtOptions, RoundMethod, TransformOptions}; @@ -194,7 +195,19 @@ pub fn div_round(n: f64, d: f64, method: RoundMethod) -> f64 { } } -fn consider_suffix(n: f64, u: &Unit, round_method: RoundMethod) -> Result<(f64, Option)> { +// Rounds to the specified number of decimal points. +fn round_with_precision(n: f64, method: RoundMethod, precision: usize) -> f64 { + let p = 10.0_f64.powf(precision as f64); + + method.round(p * n) / p +} + +fn consider_suffix( + n: f64, + u: &Unit, + round_method: RoundMethod, + precision: usize, +) -> Result<(f64, Option)> { use crate::units::RawSuffix::*; let abs_n = n.abs(); @@ -220,7 +233,11 @@ fn consider_suffix(n: f64, u: &Unit, round_method: RoundMethod) -> Result<(f64, _ => return Err("Number is too big and unsupported".to_string()), }; - let v = div_round(n, bases[i], round_method); + let v = if precision > 0 { + round_with_precision(n / bases[i], round_method, precision) + } else { + div_round(n, bases[i], round_method) + }; // check if rounding pushed us into the next base if v.abs() >= bases[1] { @@ -230,11 +247,31 @@ fn consider_suffix(n: f64, u: &Unit, round_method: RoundMethod) -> Result<(f64, } } -fn transform_to(s: f64, opts: &TransformOptions, round_method: RoundMethod) -> Result { - let (i2, s) = consider_suffix(s, &opts.to, round_method)?; +fn transform_to( + s: f64, + opts: &TransformOptions, + round_method: RoundMethod, + precision: usize, +) -> Result { + let (i2, s) = consider_suffix(s, &opts.to, round_method, precision)?; let i2 = i2 / (opts.to_unit as f64); Ok(match s { + None if precision > 0 => { + format!( + "{:.precision$}", + round_with_precision(i2, round_method, precision), + precision = precision + ) + } None => format!("{}", i2), + Some(s) if precision > 0 => { + format!( + "{:.precision$}{}", + i2, + DisplayableSuffix(s), + precision = precision + ) + } Some(s) if i2.abs() < 10.0 => format!("{:.1}{}", i2, DisplayableSuffix(s)), Some(s) => format!("{:.0}{}", i2, DisplayableSuffix(s)), }) @@ -255,6 +292,7 @@ fn format_string( transform_from(source_without_suffix, &options.transform)?, &options.transform, options.round, + options.format.precision, )?; // bring back the suffix before applying padding @@ -263,15 +301,34 @@ fn format_string( None => number, }; - Ok(match implicit_padding.unwrap_or(options.padding) { + let padding = options + .format + .padding + .unwrap_or_else(|| implicit_padding.unwrap_or(options.padding)); + + let padded_number = match padding { 0 => number_with_suffix, + p if p > 0 && options.format.zero_padding => { + let zero_padded = format!("{:0>padding$}", number_with_suffix, padding = p as usize); + + match implicit_padding.unwrap_or(options.padding) { + 0 => zero_padded, + p if p > 0 => format!("{:>padding$}", zero_padded, padding = p as usize), + p => format!("{: 0 => format!("{:>padding$}", number_with_suffix, padding = p as usize), p => format!( "{: Result<()> { @@ -342,3 +399,27 @@ pub fn format_and_print(s: &str, options: &NumfmtOptions) -> Result<()> { None => format_and_print_whitespace(s, options), } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_round_with_precision() { + let rm = RoundMethod::FromZero; + assert_eq!(1.0, round_with_precision(0.12345, rm, 0)); + assert_eq!(0.2, round_with_precision(0.12345, rm, 1)); + assert_eq!(0.13, round_with_precision(0.12345, rm, 2)); + assert_eq!(0.124, round_with_precision(0.12345, rm, 3)); + assert_eq!(0.1235, round_with_precision(0.12345, rm, 4)); + assert_eq!(0.12345, round_with_precision(0.12345, rm, 5)); + + let rm = RoundMethod::TowardsZero; + assert_eq!(0.0, round_with_precision(0.12345, rm, 0)); + assert_eq!(0.1, round_with_precision(0.12345, rm, 1)); + assert_eq!(0.12, round_with_precision(0.12345, rm, 2)); + assert_eq!(0.123, round_with_precision(0.12345, rm, 3)); + assert_eq!(0.1234, round_with_precision(0.12345, rm, 4)); + assert_eq!(0.12345, round_with_precision(0.12345, rm, 5)); + } +} diff --git a/src/uu/numfmt/src/numfmt.rs b/src/uu/numfmt/src/numfmt.rs index c73516990c6..d4d3f3584bd 100644 --- a/src/uu/numfmt/src/numfmt.rs +++ b/src/uu/numfmt/src/numfmt.rs @@ -16,8 +16,8 @@ use std::io::{BufRead, Write}; use units::{IEC_BASES, SI_BASES}; use uucore::display::Quotable; use uucore::error::UResult; -use uucore::format_usage; use uucore::ranges::Range; +use uucore::{format_usage, InvalidEncodingHandling}; pub mod errors; pub mod format; @@ -51,6 +51,12 @@ FIELDS supports cut(1) style field ranges: -M from first to M'th field (inclusive) - all fields Multiple fields/ranges can be separated with commas + +FORMAT must be suitable for printing one floating-point argument '%f'. +Optional quote (%'f) will enable --grouping (if supported by current locale). +Optional width value (%10f) will pad output. Optional zero (%010f) width +will zero pad the number. Optional negative values (%-10f) will left align. +Optional precision (%.1f) will override the input determined precision. "; const USAGE: &str = "{} [OPTION]... [NUMBER]..."; @@ -194,6 +200,15 @@ fn parse_options(args: &ArgMatches) -> Result { v => Range::from_list(v)?, }; + let format = match args.value_of(options::FORMAT) { + Some(s) => s.parse()?, + None => FormatOptions::default(), + }; + + if format.grouping && to != Unit::None { + return Err("grouping cannot be combined with --to".to_string()); + } + let delimiter = args.value_of(options::DELIMITER).map_or(Ok(None), |arg| { if arg.len() == 1 { Ok(Some(arg.to_string())) @@ -222,12 +237,35 @@ fn parse_options(args: &ArgMatches) -> Result { delimiter, round, suffix, + format, }) } +// If the --format argument and its value are provided separately, they are concatenated to avoid a +// potential clap error. For example: "--format --%f--" is changed to "--format=--%f--". +fn concat_format_arg_and_value(args: &[String]) -> Vec { + let mut processed_args: Vec = Vec::with_capacity(args.len()); + let mut iter = args.iter().peekable(); + + while let Some(arg) = iter.next() { + if arg == "--format" && iter.peek().is_some() { + processed_args.push(format!("--format={}", iter.peek().unwrap())); + iter.next(); + } else { + processed_args.push(arg.to_string()); + } + } + + processed_args +} + #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let matches = uu_app().get_matches_from(args); + let args = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); + + let matches = uu_app().get_matches_from(concat_format_arg_and_value(&args)); let options = parse_options(&matches).map_err(NumfmtError::IllegalArgument)?; @@ -271,6 +309,13 @@ pub fn uu_app<'a>() -> Command<'a> { .value_name("FIELDS") .default_value(options::FIELD_DEFAULT), ) + .arg( + Arg::new(options::FORMAT) + .long(options::FORMAT) + .help("use printf style floating-point FORMAT; see FORMAT below for details") + .takes_value(true) + .value_name("FORMAT"), + ) .arg( Arg::new(options::FROM) .long(options::FROM) @@ -351,8 +396,8 @@ pub fn uu_app<'a>() -> Command<'a> { #[cfg(test)] mod tests { use super::{ - handle_buffer, parse_unit_size, parse_unit_size_suffix, NumfmtOptions, Range, RoundMethod, - TransformOptions, Unit, + handle_buffer, parse_unit_size, parse_unit_size_suffix, FormatOptions, NumfmtOptions, + Range, RoundMethod, TransformOptions, Unit, }; use std::io::{BufReader, Error, ErrorKind, Read}; struct MockBuffer {} @@ -377,6 +422,7 @@ mod tests { delimiter: None, round: RoundMethod::Nearest, suffix: None, + format: FormatOptions::default(), } } diff --git a/src/uu/numfmt/src/options.rs b/src/uu/numfmt/src/options.rs index 43227ea1bd3..f59cc8ce5eb 100644 --- a/src/uu/numfmt/src/options.rs +++ b/src/uu/numfmt/src/options.rs @@ -1,9 +1,12 @@ +use std::str::FromStr; + use crate::units::Unit; use uucore::ranges::Range; pub const DELIMITER: &str = "delimiter"; pub const FIELD: &str = "field"; pub const FIELD_DEFAULT: &str = "1"; +pub const FORMAT: &str = "format"; pub const FROM: &str = "from"; pub const FROM_DEFAULT: &str = "none"; pub const FROM_UNIT: &str = "from-unit"; @@ -34,6 +37,7 @@ pub struct NumfmtOptions { pub delimiter: Option, pub round: RoundMethod, pub suffix: Option, + pub format: FormatOptions, } #[derive(Clone, Copy)] @@ -68,3 +72,282 @@ impl RoundMethod { } } } + +// Represents the options extracted from the --format argument provided by the user. +#[derive(Debug, PartialEq)] +pub struct FormatOptions { + pub grouping: bool, + pub padding: Option, + pub precision: usize, + pub prefix: String, + pub suffix: String, + pub zero_padding: bool, +} + +impl Default for FormatOptions { + fn default() -> Self { + Self { + grouping: false, + padding: None, + precision: 0, + prefix: String::from(""), + suffix: String::from(""), + zero_padding: false, + } + } +} + +impl FromStr for FormatOptions { + type Err = String; + + // The recognized format is: [PREFIX]%[0]['][-][N][.][N]f[SUFFIX] + // + // The format defines the printing of a floating point argument '%f'. + // An optional quote (%'f) enables --grouping. + // An optional width value (%10f) will pad the number. + // An optional zero (%010f) will zero pad the number. + // An optional negative value (%-10f) will left align. + // An optional precision (%.1f) determines the precision of the number. + fn from_str(s: &str) -> Result { + let mut iter = s.chars().peekable(); + let mut options = Self::default(); + + let mut padding = String::from(""); + let mut precision = String::from(""); + let mut double_percentage_counter = 0; + + // '%' chars in the prefix, if any, must appear in blocks of even length, for example: "%%%%" and + // "%% %%" are ok, "%%% %" is not ok. A single '%' is treated as the beginning of the + // floating point argument. + while let Some(c) = iter.next() { + match c { + '%' if iter.peek() == Some(&'%') => { + iter.next(); + double_percentage_counter += 1; + + for _ in 0..2 { + options.prefix.push('%'); + } + } + '%' => break, + _ => options.prefix.push(c), + } + } + + // GNU numfmt drops a char from the prefix for every '%%' in the prefix, so we do the same + for _ in 0..double_percentage_counter { + options.prefix.pop(); + } + + if iter.peek().is_none() { + return if options.prefix == s { + Err(format!("format '{}' has no % directive", s)) + } else { + Err(format!("format '{}' ends in %", s)) + }; + } + + // GNU numfmt allows to mix the characters " ", "'", and "0" in any way, so we do the same + while matches!(iter.peek(), Some(' ') | Some('\'') | Some('0')) { + match iter.next().unwrap() { + ' ' => (), + '\'' => options.grouping = true, + '0' => options.zero_padding = true, + _ => unreachable!(), + } + } + + if let Some('-') = iter.peek() { + iter.next(); + + match iter.peek() { + Some(c) if c.is_ascii_digit() => padding.push('-'), + _ => { + return Err(format!( + "invalid format '{}', directive must be %[0]['][-][N][.][N]f", + s + )) + } + } + } + + while let Some(c) = iter.peek() { + if c.is_ascii_digit() { + padding.push(*c); + iter.next(); + } else { + break; + } + } + + if !padding.is_empty() { + if let Ok(p) = padding.parse() { + options.padding = Some(p); + } else { + return Err(format!("invalid format '{}' (width overflow)", s)); + } + } + + if let Some('.') = iter.peek() { + iter.next(); + + if matches!(iter.peek(), Some(' ') | Some('+') | Some('-')) { + return Err(format!("invalid precision in format '{}'", s)); + } + + while let Some(c) = iter.peek() { + if c.is_ascii_digit() { + precision.push(*c); + iter.next(); + } else { + break; + } + } + + if !precision.is_empty() { + if let Ok(p) = precision.parse() { + options.precision = p; + } else { + return Err(format!("invalid precision in format '{}'", s)); + } + } + } + + if let Some('f') = iter.peek() { + iter.next(); + } else { + return Err(format!( + "invalid format '{}', directive must be %[0]['][-][N][.][N]f", + s + )); + } + + // '%' chars in the suffix, if any, must appear in blocks of even length, otherwise + // it is an error. For example: "%%%%" and "%% %%" are ok, "%%% %" is not ok. + while let Some(c) = iter.next() { + if c != '%' { + options.suffix.push(c); + } else if iter.peek() == Some(&'%') { + for _ in 0..2 { + options.suffix.push('%'); + } + iter.next(); + } else { + return Err(format!("format '{}' has too many % directives", s)); + } + } + + Ok(options) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_format() { + assert_eq!(FormatOptions::default(), "%f".parse().unwrap()); + assert_eq!(FormatOptions::default(), "% f".parse().unwrap()); + } + + #[test] + fn test_parse_format_with_invalid_formats() { + assert!("".parse::().is_err()); + assert!("hello".parse::().is_err()); + assert!("hello%".parse::().is_err()); + assert!("%-f".parse::().is_err()); + assert!("%d".parse::().is_err()); + assert!("%4 f".parse::().is_err()); + assert!("%f%".parse::().is_err()); + assert!("%f%%%".parse::().is_err()); + assert!("%%f".parse::().is_err()); + assert!("%%%%f".parse::().is_err()); + assert!("%.-1f".parse::().is_err()); + assert!("%. 1f".parse::().is_err()); + assert!("%18446744073709551616f".parse::().is_err()); + assert!("%.18446744073709551616f".parse::().is_err()); + } + + #[test] + fn test_parse_format_with_prefix_and_suffix() { + let formats = vec![ + ("--%f", "--", ""), + ("%f::", "", "::"), + ("--%f::", "--", "::"), + ("%f%%", "", "%%"), + ("%%%f", "%", ""), + ("%% %f", "%%", ""), + ]; + + for (format, expected_prefix, expected_suffix) in formats { + let options: FormatOptions = format.parse().unwrap(); + assert_eq!(expected_prefix, options.prefix); + assert_eq!(expected_suffix, options.suffix); + } + } + + #[test] + fn test_parse_format_with_padding() { + let mut expected_options = FormatOptions::default(); + let formats = vec![("%12f", Some(12)), ("%-12f", Some(-12))]; + + for (format, expected_padding) in formats { + expected_options.padding = expected_padding; + assert_eq!(expected_options, format.parse().unwrap()); + } + } + + #[test] + fn test_parse_format_with_precision() { + let mut expected_options = FormatOptions::default(); + let formats = vec![ + ("%6.2f", Some(6), 2), + ("%6.f", Some(6), 0), + ("%.2f", None, 2), + ("%.f", None, 0), + ]; + + for (format, expected_padding, expected_precision) in formats { + expected_options.padding = expected_padding; + expected_options.precision = expected_precision; + assert_eq!(expected_options, format.parse().unwrap()); + } + } + + #[test] + fn test_parse_format_with_grouping() { + let expected_options = FormatOptions { + grouping: true, + ..Default::default() + }; + assert_eq!(expected_options, "%'f".parse().unwrap()); + assert_eq!(expected_options, "% ' f".parse().unwrap()); + assert_eq!(expected_options, "%'''''''f".parse().unwrap()); + } + + #[test] + fn test_parse_format_with_zero_padding() { + let expected_options = FormatOptions { + padding: Some(10), + zero_padding: true, + ..Default::default() + }; + assert_eq!(expected_options, "%010f".parse().unwrap()); + assert_eq!(expected_options, "% 0 10f".parse().unwrap()); + assert_eq!(expected_options, "%0000000010f".parse().unwrap()); + } + + #[test] + fn test_parse_format_with_grouping_and_zero_padding() { + let expected_options = FormatOptions { + grouping: true, + zero_padding: true, + ..Default::default() + }; + assert_eq!(expected_options, "%0'f".parse().unwrap()); + assert_eq!(expected_options, "%'0f".parse().unwrap()); + assert_eq!(expected_options, "%0'0'0'f".parse().unwrap()); + assert_eq!(expected_options, "%'0'0'0f".parse().unwrap()); + } +} diff --git a/src/uu/numfmt/src/units.rs b/src/uu/numfmt/src/units.rs index 4a0b8a36e8f..cd32cfc87a3 100644 --- a/src/uu/numfmt/src/units.rs +++ b/src/uu/numfmt/src/units.rs @@ -17,7 +17,7 @@ pub const IEC_BASES: [f64; 10] = [ pub type WithI = bool; -#[derive(PartialEq)] +#[derive(Clone, Copy, PartialEq)] pub enum Unit { Auto, Si, diff --git a/tests/by-util/test_numfmt.rs b/tests/by-util/test_numfmt.rs index 089a6b37acc..e062837e1e9 100644 --- a/tests/by-util/test_numfmt.rs +++ b/tests/by-util/test_numfmt.rs @@ -673,3 +673,241 @@ fn test_valid_but_forbidden_suffix() { )); } } + +#[test] +fn test_format() { + new_ucmd!() + .args(&["--format=--%f--", "50"]) + .succeeds() + .stdout_is("--50--\n"); +} + +#[test] +fn test_format_with_separate_value() { + new_ucmd!() + .args(&["--format", "--%f--", "50"]) + .succeeds() + .stdout_is("--50--\n"); +} + +#[test] +fn test_format_padding_with_prefix_and_suffix() { + new_ucmd!() + .args(&["--format=--%6f--", "50"]) + .succeeds() + .stdout_is("-- 50--\n"); +} + +#[test] +fn test_format_negative_padding_with_prefix_and_suffix() { + new_ucmd!() + .args(&["--format=--%-6f--", "50"]) + .succeeds() + .stdout_is("--50 --\n"); +} + +#[test] +fn test_format_with_format_padding_overriding_padding_option() { + new_ucmd!() + .args(&["--format=%6f", "--padding=10", "1234"]) + .succeeds() + .stdout_is(" 1234\n"); +} + +#[test] +fn test_format_with_format_padding_overriding_implicit_padding() { + new_ucmd!() + .args(&["--format=%6f", " 1234"]) + .succeeds() + .stdout_is(" 1234\n"); +} + +#[test] +fn test_format_with_negative_format_padding_and_suffix() { + new_ucmd!() + .args(&["--format=%-6f", "1234 ?"]) + .succeeds() + .stdout_is("1234 ?\n"); +} + +#[test] +fn test_format_with_zero_padding() { + let formats = vec!["%06f", "%0 6f"]; + + for format in formats { + new_ucmd!() + .args(&[format!("--format={}", format), String::from("1234")]) + .succeeds() + .stdout_is("001234\n"); + } +} + +#[test] +fn test_format_with_zero_padding_and_padding_option() { + new_ucmd!() + .args(&["--format=%06f", "--padding=8", "1234"]) + .succeeds() + .stdout_is(" 001234\n"); +} + +#[test] +fn test_format_with_zero_padding_and_negative_padding_option() { + new_ucmd!() + .args(&["--format=%06f", "--padding=-8", "1234"]) + .succeeds() + .stdout_is("001234 \n"); +} + +#[test] +fn test_format_with_zero_padding_and_implicit_padding() { + new_ucmd!() + .args(&["--format=%06f", " 1234"]) + .succeeds() + .stdout_is(" 001234\n"); +} + +#[test] +fn test_format_with_zero_padding_and_suffix() { + new_ucmd!() + .args(&["--format=%06f", "1234 ?"]) + .succeeds() + .stdout_is("001234 ?\n"); +} + +#[test] +fn test_format_with_precision() { + let values = vec![("0.99", "1.0"), ("1", "1.0"), ("1.01", "1.1")]; + + for (input, expected) in values { + new_ucmd!() + .args(&["--format=%.1f", input]) + .succeeds() + .stdout_is(format!("{}\n", expected)); + } + + let values = vec![("0.99", "0.99"), ("1", "1.00"), ("1.01", "1.01")]; + + for (input, expected) in values { + new_ucmd!() + .args(&["--format=%.2f", input]) + .succeeds() + .stdout_is(format!("{}\n", expected)); + } +} + +#[test] +fn test_format_with_precision_and_down_rounding() { + let values = vec![("0.99", "0.9"), ("1", "1.0"), ("1.01", "1.0")]; + + for (input, expected) in values { + new_ucmd!() + .args(&["--format=%.1f", input, "--round=down"]) + .succeeds() + .stdout_is(format!("{}\n", expected)); + } +} + +#[test] +fn test_format_with_precision_and_to_arg() { + let values = vec![("%.1f", "10.0G"), ("%.4f", "9.9913G")]; + + for (format, expected) in values { + new_ucmd!() + .args(&[ + format!("--format={}", format), + "9991239123".to_string(), + "--to=si".to_string(), + ]) + .succeeds() + .stdout_is(format!("{}\n", expected)); + } +} + +#[test] +fn test_format_without_percentage_directive() { + let invalid_formats = vec!["", "hello"]; + + for invalid_format in invalid_formats { + new_ucmd!() + .arg(format!("--format={}", invalid_format)) + .fails() + .code_is(1) + .stderr_contains(format!("format '{}' has no % directive", invalid_format)); + } +} + +#[test] +fn test_format_with_percentage_directive_at_end() { + let invalid_format = "hello%"; + + new_ucmd!() + .arg(format!("--format={}", invalid_format)) + .fails() + .code_is(1) + .stderr_contains(format!("format '{}' ends in %", invalid_format)); +} + +#[test] +fn test_format_with_too_many_percentage_directives() { + let invalid_format = "%f %f"; + + new_ucmd!() + .arg(format!("--format={}", invalid_format)) + .fails() + .code_is(1) + .stderr_contains(format!( + "format '{}' has too many % directives", + invalid_format + )); +} + +#[test] +fn test_format_with_invalid_format() { + let invalid_formats = vec!["%d", "% -43 f"]; + + for invalid_format in invalid_formats { + new_ucmd!() + .arg(format!("--format={}", invalid_format)) + .fails() + .code_is(1) + .stderr_contains(format!( + "invalid format '{}', directive must be %[0]['][-][N][.][N]f", + invalid_format + )); + } +} + +#[test] +fn test_format_with_width_overflow() { + let invalid_format = "%18446744073709551616f"; + new_ucmd!() + .arg(format!("--format={}", invalid_format)) + .fails() + .code_is(1) + .stderr_contains(format!( + "invalid format '{}' (width overflow)", + invalid_format + )); +} + +#[test] +fn test_format_with_invalid_precision() { + let invalid_formats = vec!["%.-1f", "%.+1f", "%. 1f", "%.18446744073709551616f"]; + + for invalid_format in invalid_formats { + new_ucmd!() + .arg(format!("--format={}", invalid_format)) + .fails() + .code_is(1) + .stderr_contains(format!("invalid precision in format '{}'", invalid_format)); + } +} + +#[test] +fn test_format_grouping_conflicts_with_to_option() { + new_ucmd!() + .args(&["--format=%'f", "--to=si"]) + .fails() + .code_is(1) + .stderr_contains("grouping cannot be combined with --to"); +}