From d0f608c69fcb5e70b029820f8a40319ae4270964 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Sun, 19 Jun 2022 12:37:54 +0200 Subject: [PATCH] paste: implement "-z" flag Fixes #3637 --- src/uu/paste/src/paste.rs | 100 ++++++++++++++++++++++++++++++------ tests/by-util/test_paste.rs | 48 +++++++++++++++++ 2 files changed, 131 insertions(+), 17 deletions(-) diff --git a/src/uu/paste/src/paste.rs b/src/uu/paste/src/paste.rs index 682e66a5da9..bc0ae7214f5 100644 --- a/src/uu/paste/src/paste.rs +++ b/src/uu/paste/src/paste.rs @@ -8,6 +8,7 @@ // spell-checker:ignore (ToDO) delim use clap::{crate_version, Arg, Command}; +use std::fmt::Display; use std::fs::File; use std::io::{stdin, stdout, BufRead, BufReader, Read, Write}; use std::path::Path; @@ -20,16 +21,34 @@ mod options { pub const DELIMITER: &str = "delimiters"; pub const SERIAL: &str = "serial"; pub const FILE: &str = "file"; + pub const ZERO_TERMINATED: &str = "zero-terminated"; +} + +#[repr(u8)] +#[derive(Clone, Copy)] +enum LineEnding { + Newline = b'\n', + Nul = 0, +} + +impl Display for LineEnding { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Newline => writeln!(f), + Self::Nul => write!(f, "\0"), + } + } } // Wraps BufReader and stdin -fn read_line( +fn read_until( reader: Option<&mut BufReader>, - buf: &mut String, + byte: u8, + buf: &mut Vec, ) -> std::io::Result { match reader { - Some(reader) => reader.read_line(buf), - None => stdin().read_line(buf), + Some(reader) => reader.read_until(byte, buf), + None => stdin().lock().read_until(byte, buf), } } @@ -44,7 +63,13 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { .unwrap() .map(|s| s.to_owned()) .collect(); - paste(files, serial, delimiters) + let line_ending = if matches.is_present(options::ZERO_TERMINATED) { + LineEnding::Nul + } else { + LineEnding::Newline + }; + + paste(files, serial, delimiters, line_ending) } pub fn uu_app<'a>() -> Command<'a> { @@ -74,9 +99,20 @@ pub fn uu_app<'a>() -> Command<'a> { .default_value("-") .value_hint(clap::ValueHint::FilePath), ) + .arg( + Arg::new(options::ZERO_TERMINATED) + .long(options::ZERO_TERMINATED) + .short('z') + .help("line delimiter is NUL, not newline"), + ) } -fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> { +fn paste( + filenames: Vec, + serial: bool, + delimiters: &str, + line_ending: LineEnding, +) -> UResult<()> { let mut files = Vec::with_capacity(filenames.len()); for name in filenames { let file = if name == "-" { @@ -91,28 +127,44 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> let delimiters: Vec = unescape(delimiters).chars().collect(); let mut delim_count = 0; + let mut delim_length = 1; let stdout = stdout(); let mut stdout = stdout.lock(); - let mut output = String::new(); + let mut output = Vec::new(); if serial { for file in &mut files { output.clear(); loop { - match read_line(file.as_mut(), &mut output) { + match read_until(file.as_mut(), line_ending as u8, &mut output) { Ok(0) => break, Ok(_) => { - if output.ends_with('\n') { + if output.ends_with(&[line_ending as u8]) { output.pop(); } - output.push(delimiters[delim_count % delimiters.len()]); + // a buffer of length four is large enough to encode any char + let mut buffer = [0; 4]; + let ch = + delimiters[delim_count % delimiters.len()].encode_utf8(&mut buffer); + delim_length = ch.len(); + + for byte in buffer.iter().take(delim_length) { + output.push(*byte); + } } Err(e) => return Err(e.map_err_context(String::new)), } delim_count += 1; } - output.pop(); - writeln!(stdout, "{}", output)?; + // remove final delimiter + output.truncate(output.len() - delim_length); + + write!( + stdout, + "{}{}", + String::from_utf8_lossy(&output), + line_ending + )?; } } else { let mut eof = vec![false; files.len()]; @@ -123,28 +175,42 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> if eof[i] { eof_count += 1; } else { - match read_line(file.as_mut(), &mut output) { + match read_until(file.as_mut(), line_ending as u8, &mut output) { Ok(0) => { eof[i] = true; eof_count += 1; } Ok(_) => { - if output.ends_with('\n') { + if output.ends_with(&[line_ending as u8]) { output.pop(); } } Err(e) => return Err(e.map_err_context(String::new)), } } - output.push(delimiters[delim_count % delimiters.len()]); + // a buffer of length four is large enough to encode any char + let mut buffer = [0; 4]; + let ch = delimiters[delim_count % delimiters.len()].encode_utf8(&mut buffer); + delim_length = ch.len(); + + for byte in buffer.iter().take(delim_length) { + output.push(*byte); + } + delim_count += 1; } if files.len() == eof_count { break; } // Remove final delimiter - output.pop(); - writeln!(stdout, "{}", output)?; + output.truncate(output.len() - delim_length); + + write!( + stdout, + "{}{}", + String::from_utf8_lossy(&output), + line_ending + )?; delim_count = 0; } } diff --git a/tests/by-util/test_paste.rs b/tests/by-util/test_paste.rs index 6d539e45e52..b4d23868ac9 100644 --- a/tests/by-util/test_paste.rs +++ b/tests/by-util/test_paste.rs @@ -33,6 +33,30 @@ static EXAMPLE_DATA: &[TestData] = &[ ins: &["a\n", "b\n"], out: "a\tb\n", }, + TestData { + name: "zno-nl-1", + args: &["-z"], + ins: &["a", "b"], + out: "a\tb\0", + }, + TestData { + name: "zno-nl-2", + args: &["-z"], + ins: &["a\0", "b"], + out: "a\tb\0", + }, + TestData { + name: "zno-nl-3", + args: &["-z"], + ins: &["a", "b\0"], + out: "a\tb\0", + }, + TestData { + name: "zno-nl-4", + args: &["-z"], + ins: &["a\0", "b\0"], + out: "a\tb\0", + }, // Same as above, but with a two lines in each input file and the // addition of the -d option to make SPACE be the output // delimiter. @@ -60,6 +84,30 @@ static EXAMPLE_DATA: &[TestData] = &[ ins: &["1\na\n", "2\nb\n"], out: "1 2\na b\n", }, + TestData { + name: "zno-nla1", + args: &["-zd", " "], + ins: &["1\0a", "2\0b"], + out: "1 2\0a b\0", + }, + TestData { + name: "zno-nla2", + args: &["-zd", " "], + ins: &["1\0a\0", "2\0b"], + out: "1 2\0a b\0", + }, + TestData { + name: "zno-nla3", + args: &["-zd", " "], + ins: &["1\0a", "2\0b\0"], + out: "1 2\0a b\0", + }, + TestData { + name: "zno-nla4", + args: &["-zd", " "], + ins: &["1\0a\0", "2\0b\0"], + out: "1 2\0a b\0", + }, TestData { name: "multibyte-delim", args: &["-d", "💣"],