Skip to content

Commit

Permalink
paste: implement "-z" flag
Browse files Browse the repository at this point in the history
Fixes #3637
  • Loading branch information
cakebaker committed Jun 21, 2022
1 parent c277e93 commit 69b890a
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 17 deletions.
100 changes: 83 additions & 17 deletions src/uu/paste/src/paste.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
// spell-checker:ignore (ToDO) delim

use clap::{crate_version, Arg, Command};
use std::fmt::Display;
use std::fs::File;
use std::io::{stdin, stdout, BufRead, BufReader, Read, Write};
use std::path::Path;
Expand All @@ -20,16 +21,34 @@ mod options {
pub const DELIMITER: &str = "delimiters";
pub const SERIAL: &str = "serial";
pub const FILE: &str = "file";
pub const ZERO_TERMINATED: &str = "zero-terminated";
}

#[repr(u8)]
#[derive(Clone, Copy)]
enum LineEnding {
Newline = b'\n',
Nul = 0,
}

impl Display for LineEnding {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Newline => write!(f, "\n"),
Self::Nul => write!(f, "\0"),
}
}
}

// Wraps BufReader and stdin
fn read_line<R: Read>(
fn read_until<R: Read>(
reader: Option<&mut BufReader<R>>,
buf: &mut String,
byte: u8,
buf: &mut Vec<u8>,
) -> std::io::Result<usize> {
match reader {
Some(reader) => reader.read_line(buf),
None => stdin().read_line(buf),
Some(reader) => reader.read_until(byte, buf),
None => stdin().lock().read_until(byte, buf),
}
}

Expand All @@ -44,7 +63,13 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
.unwrap()
.map(|s| s.to_owned())
.collect();
paste(files, serial, delimiters)
let line_ending = if matches.is_present(options::ZERO_TERMINATED) {
LineEnding::Nul
} else {
LineEnding::Newline
};

paste(files, serial, delimiters, line_ending)
}

pub fn uu_app<'a>() -> Command<'a> {
Expand Down Expand Up @@ -74,9 +99,20 @@ pub fn uu_app<'a>() -> Command<'a> {
.default_value("-")
.value_hint(clap::ValueHint::FilePath),
)
.arg(
Arg::new(options::ZERO_TERMINATED)
.long(options::ZERO_TERMINATED)
.short('z')
.help("line delimiter is NUL, not newline"),
)
}

fn paste(filenames: Vec<String>, serial: bool, delimiters: &str) -> UResult<()> {
fn paste(
filenames: Vec<String>,
serial: bool,
delimiters: &str,
line_ending: LineEnding,
) -> UResult<()> {
let mut files = Vec::with_capacity(filenames.len());
for name in filenames {
let file = if name == "-" {
Expand All @@ -91,28 +127,44 @@ fn paste(filenames: Vec<String>, serial: bool, delimiters: &str) -> UResult<()>

let delimiters: Vec<char> = unescape(delimiters).chars().collect();
let mut delim_count = 0;
let mut final_delim_length = 1;
let stdout = stdout();
let mut stdout = stdout.lock();

let mut output = String::new();
let mut output = Vec::new();
if serial {
for file in &mut files {
output.clear();
loop {
match read_line(file.as_mut(), &mut output) {
match read_until(file.as_mut(), line_ending as u8, &mut output) {
Ok(0) => break,
Ok(_) => {
if output.ends_with('\n') {
if output.ends_with(&[line_ending as u8]) {
output.pop();
}
output.push(delimiters[delim_count % delimiters.len()]);
// a buffer of length four is large enough to encode any char
let mut buffer = [0; 4];
let ch =
delimiters[delim_count % delimiters.len()].encode_utf8(&mut buffer);
final_delim_length = ch.len();

for i in 0..ch.len() {
output.push(buffer[i]);
}
}
Err(e) => return Err(e.map_err_context(String::new)),
}
delim_count += 1;
}
output.pop();
writeln!(stdout, "{}", output)?;
// remove final delimiter
output.truncate(output.len() - final_delim_length);

write!(
stdout,
"{}{}",
String::from_utf8_lossy(&output),
line_ending
)?;
}
} else {
let mut eof = vec![false; files.len()];
Expand All @@ -123,28 +175,42 @@ fn paste(filenames: Vec<String>, serial: bool, delimiters: &str) -> UResult<()>
if eof[i] {
eof_count += 1;
} else {
match read_line(file.as_mut(), &mut output) {
match read_until(file.as_mut(), line_ending as u8, &mut output) {
Ok(0) => {
eof[i] = true;
eof_count += 1;
}
Ok(_) => {
if output.ends_with('\n') {
if output.ends_with(&[line_ending as u8]) {
output.pop();
}
}
Err(e) => return Err(e.map_err_context(String::new)),
}
}
output.push(delimiters[delim_count % delimiters.len()]);
// a buffer of length four is large enough to encode any char
let mut buffer = [0; 4];
let ch = delimiters[delim_count % delimiters.len()].encode_utf8(&mut buffer);
final_delim_length = ch.len();

for i in 0..ch.len() {
output.push(buffer[i]);
}

delim_count += 1;
}
if files.len() == eof_count {
break;
}
// Remove final delimiter
output.pop();
writeln!(stdout, "{}", output)?;
output.truncate(output.len() - final_delim_length);

write!(
stdout,
"{}{}",
String::from_utf8_lossy(&output),
line_ending
)?;
delim_count = 0;
}
}
Expand Down
48 changes: 48 additions & 0 deletions tests/by-util/test_paste.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,30 @@ static EXAMPLE_DATA: &[TestData] = &[
ins: &["a\n", "b\n"],
out: "a\tb\n",
},
TestData {
name: "zno-nl-1",
args: &["-z"],
ins: &["a", "b"],
out: "a\tb\0",
},
TestData {
name: "zno-nl-2",
args: &["-z"],
ins: &["a\0", "b"],
out: "a\tb\0",
},
TestData {
name: "zno-nl-3",
args: &["-z"],
ins: &["a", "b\0"],
out: "a\tb\0",
},
TestData {
name: "zno-nl-4",
args: &["-z"],
ins: &["a\0", "b\0"],
out: "a\tb\0",
},
// Same as above, but with a two lines in each input file and the
// addition of the -d option to make SPACE be the output
// delimiter.
Expand Down Expand Up @@ -60,6 +84,30 @@ static EXAMPLE_DATA: &[TestData] = &[
ins: &["1\na\n", "2\nb\n"],
out: "1 2\na b\n",
},
TestData {
name: "zno-nla1",
args: &["-zd", " "],
ins: &["1\0a", "2\0b"],
out: "1 2\0a b\0",
},
TestData {
name: "zno-nla2",
args: &["-zd", " "],
ins: &["1\0a\0", "2\0b"],
out: "1 2\0a b\0",
},
TestData {
name: "zno-nla3",
args: &["-zd", " "],
ins: &["1\0a", "2\0b\0"],
out: "1 2\0a b\0",
},
TestData {
name: "zno-nla4",
args: &["-zd", " "],
ins: &["1\0a\0", "2\0b\0"],
out: "1 2\0a b\0",
},
TestData {
name: "multibyte-delim",
args: &["-d", "💣"],
Expand Down

0 comments on commit 69b890a

Please sign in to comment.