From 20dfb270577eb77ddd7fac6b9f1342c207d99458 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 27 Dec 2024 09:12:47 +0100 Subject: [PATCH] cut: fix handling of newline as delimiter --- src/uu/cut/src/cut.rs | 38 +++++++++++++++++++++++++++++++++++++- tests/by-util/test_cut.rs | 17 ++++++++++++++--- 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/uu/cut/src/cut.rs b/src/uu/cut/src/cut.rs index 3dde5e6659..5e128425b6 100644 --- a/src/uu/cut/src/cut.rs +++ b/src/uu/cut/src/cut.rs @@ -9,7 +9,7 @@ use bstr::io::BufReadExt; use clap::{builder::ValueParser, crate_version, Arg, ArgAction, ArgMatches, Command}; use std::ffi::OsString; use std::fs::File; -use std::io::{stdin, stdout, BufReader, BufWriter, IsTerminal, Read, Write}; +use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, IsTerminal, Read, Write}; use std::path::Path; use uucore::display::Quotable; use uucore::error::{set_exit_code, FromIo, UResult, USimpleError}; @@ -267,10 +267,46 @@ fn cut_fields_implicit_out_delim( Ok(()) } +// The input delimiter is identical to `newline_char` +fn cut_fields_newline_char_delim( + reader: R, + ranges: &[Range], + newline_char: u8, + out_delim: &[u8], +) -> UResult<()> { + let buf_in = BufReader::new(reader); + let mut out = stdout_writer(); + + let segments: Vec<_> = buf_in.split(newline_char).filter_map(|x| x.ok()).collect(); + let mut print_delim = false; + + for &Range { low, high } in ranges { + for i in low..=high { + // "- 1" is necessary because fields start from 1 whereas a Vec starts from 0 + if let Some(segment) = segments.get(i - 1) { + if print_delim { + out.write_all(out_delim)?; + } else { + print_delim = true; + } + out.write_all(segment.as_slice())?; + } else { + break; + } + } + } + out.write_all(&[newline_char])?; + Ok(()) +} + fn cut_fields(reader: R, ranges: &[Range], opts: &Options) -> UResult<()> { let newline_char = opts.line_ending.into(); let field_opts = opts.field_opts.as_ref().unwrap(); // it is safe to unwrap() here - field_opts will always be Some() for cut_fields() call match field_opts.delimiter { + Delimiter::Slice(delim) if delim == [newline_char] => { + let out_delim = opts.out_delimiter.unwrap_or(delim); + cut_fields_newline_char_delim(reader, ranges, newline_char, out_delim) + } Delimiter::Slice(delim) => { let matcher = ExactMatcher::new(delim); match opts.out_delimiter { diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index 1aa3c126a2..dbd26abb28 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -288,11 +288,22 @@ fn test_empty_string_as_delimiter_with_output_delimiter() { #[test] fn test_newline_as_delimiter() { + for (field, expected_output) in [("1", "a:1\n"), ("2", "b:\n")] { + new_ucmd!() + .args(&["-f", field, "-d", "\n"]) + .pipe_in("a:1\nb:") + .succeeds() + .stdout_only_bytes(expected_output); + } +} + +#[test] +fn test_newline_as_delimiter_with_output_delimiter() { new_ucmd!() - .args(&["-f", "1", "-d", "\n"]) - .pipe_in("a:1\nb:") + .args(&["-f1-", "-d", "\n", "--output-delimiter=:"]) + .pipe_in("a\nb\n") .succeeds() - .stdout_only_bytes("a:1\nb:\n"); + .stdout_only_bytes("a:b\n"); } #[test]