diff --git a/Cargo.lock b/Cargo.lock index 9827a789349..caf4946257d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2552,6 +2552,7 @@ dependencies = [ "clap", "gcd", "libc", + "nix", "signal-hook", "uucore", ] diff --git a/src/uu/dd/Cargo.toml b/src/uu/dd/Cargo.toml index b81ef1a8efc..04b60dc3113 100644 --- a/src/uu/dd/Cargo.toml +++ b/src/uu/dd/Cargo.toml @@ -20,6 +20,9 @@ gcd = { workspace=true } libc = { workspace=true } uucore = { workspace=true, features=["memo"] } +[target.'cfg(any(target_os = "linux"))'.dependencies] +nix = { workspace=true, features = ["fs"] } + [target.'cfg(any(target_os = "linux", target_os = "android"))'.dependencies] signal-hook = { workspace=true } diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index 836456635c1..6727f4bb00d 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -5,7 +5,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, behaviour, bmax, bremain, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, iseek, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, oseek, outfile, parseargs, rlen, rmax, rremain, rsofar, rstat, sigusr, wlen, wstat seekable oconv canonicalized +// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, behaviour, bmax, bremain, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, iseek, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, oseek, outfile, parseargs, rlen, rmax, rremain, rsofar, rstat, sigusr, wlen, wstat seekable oconv canonicalized fadvise Fadvise FADV DONTNEED ESPIPE mod datastructures; use datastructures::*; @@ -42,9 +42,16 @@ use std::time; use clap::{crate_version, Arg, Command}; use gcd::Gcd; +#[cfg(target_os = "linux")] +use nix::{ + errno::Errno, + fcntl::{posix_fadvise, PosixFadviseAdvice}, +}; use uucore::display::Quotable; use uucore::error::{FromIo, UResult}; use uucore::{format_usage, help_about, help_section, help_usage, show_error}; +#[cfg(target_os = "linux")] +use uucore::{show, show_if_err}; const ABOUT: &str = help_about!("dd.md"); const AFTER_HELP: &str = help_section!("after help", "dd.md"); @@ -131,6 +138,16 @@ impl Source { Self::StdinFile(f) } + /// The length of the data source in number of bytes. + /// + /// If it cannot be determined, then this function returns 0. + fn len(&self) -> std::io::Result { + match self { + Self::File(f) => Ok(f.metadata()?.len().try_into().unwrap_or(i64::MAX)), + _ => Ok(0), + } + } + fn skip(&mut self, n: u64) -> io::Result { match self { #[cfg(not(unix))] @@ -156,6 +173,23 @@ impl Source { Self::Fifo(f) => io::copy(&mut f.take(n), &mut io::sink()), } } + + /// Discard the system file cache for the given portion of the data source. + /// + /// `offset` and `len` specify a contiguous portion of the data + /// source. This function informs the kernel that the specified + /// portion of the source is no longer needed. If not possible, + /// then this function returns an error. + #[cfg(target_os = "linux")] + fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) -> nix::Result<()> { + match self { + Self::File(f) => { + let advice = PosixFadviseAdvice::POSIX_FADV_DONTNEED; + posix_fadvise(f.as_raw_fd(), offset, len, advice) + } + _ => Err(Errno::ESPIPE), // "Illegal seek" + } + } } impl Read for Source { @@ -296,6 +330,29 @@ impl<'a> Read for Input<'a> { } impl<'a> Input<'a> { + /// Discard the system file cache for the given portion of the input. + /// + /// `offset` and `len` specify a contiguous portion of the input. + /// This function informs the kernel that the specified portion of + /// the input file is no longer needed. If not possible, then this + /// function prints an error message to stderr and sets the exit + /// status code to 1. + #[allow(unused_variables)] + fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) { + #[cfg(target_os = "linux")] + { + show_if_err!(self + .src + .discard_cache(offset, len) + .map_err_context(|| "failed to discard cache for: 'standard input'".to_string())); + } + #[cfg(not(target_os = "linux"))] + { + // TODO Is there a way to discard filesystem cache on + // these other operating systems? + } + } + /// Fills a given buffer. /// Reads in increments of 'self.ibs'. /// The start of each ibs-sized read follows the previous one. @@ -317,13 +374,13 @@ impl<'a> Input<'a> { _ => break, } } - buf.truncate(bytes_total); Ok(ReadStat { reads_complete, reads_partial, // Records are not truncated when filling. records_truncated: 0, + bytes_total: bytes_total.try_into().unwrap(), }) } @@ -334,6 +391,7 @@ impl<'a> Input<'a> { let mut reads_complete = 0; let mut reads_partial = 0; let mut base_idx = 0; + let mut bytes_total = 0; while base_idx < buf.len() { let next_blk = cmp::min(base_idx + self.settings.ibs, buf.len()); @@ -342,11 +400,13 @@ impl<'a> Input<'a> { match self.read(&mut buf[base_idx..next_blk])? { 0 => break, rlen if rlen < target_len => { + bytes_total += rlen; reads_partial += 1; let padding = vec![pad; target_len - rlen]; buf.splice(base_idx + rlen..next_blk, padding.into_iter()); } - _ => { + rlen => { + bytes_total += rlen; reads_complete += 1; } } @@ -359,6 +419,7 @@ impl<'a> Input<'a> { reads_complete, reads_partial, records_truncated: 0, + bytes_total: bytes_total.try_into().unwrap(), }) } } @@ -447,6 +508,33 @@ impl Dest { _ => Ok(()), } } + + /// Discard the system file cache for the given portion of the destination. + /// + /// `offset` and `len` specify a contiguous portion of the + /// destination. This function informs the kernel that the + /// specified portion of the destination is no longer needed. If + /// not possible, then this function returns an error. + #[cfg(target_os = "linux")] + fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) -> nix::Result<()> { + match self { + Self::File(f, _) => { + let advice = PosixFadviseAdvice::POSIX_FADV_DONTNEED; + posix_fadvise(f.as_raw_fd(), offset, len, advice) + } + _ => Err(Errno::ESPIPE), // "Illegal seek" + } + } + + /// The length of the data destination in number of bytes. + /// + /// If it cannot be determined, then this function returns 0. + fn len(&self) -> std::io::Result { + match self { + Self::File(f, _) => Ok(f.metadata()?.len().try_into().unwrap_or(i64::MAX)), + _ => Ok(0), + } + } } /// Decide whether the given buffer is all zeros. @@ -580,6 +668,29 @@ impl<'a> Output<'a> { Ok(Self { dst, settings }) } + /// Discard the system file cache for the given portion of the output. + /// + /// `offset` and `len` specify a contiguous portion of the output. + /// This function informs the kernel that the specified portion of + /// the output file is no longer needed. If not possible, then + /// this function prints an error message to stderr and sets the + /// exit status code to 1. + #[allow(unused_variables)] + fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) { + #[cfg(target_os = "linux")] + { + show_if_err!(self + .dst + .discard_cache(offset, len) + .map_err_context(|| "failed to discard cache for: 'standard output'".to_string())); + } + #[cfg(target_os = "linux")] + { + // TODO Is there a way to discard filesystem cache on + // these other operating systems? + } + } + /// Write the given bytes one block at a time. /// /// This may write partial blocks (for example, if the underlying @@ -674,6 +785,27 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> { // Optimization: if no blocks are to be written, then don't // bother allocating any buffers. if let Some(Num::Blocks(0) | Num::Bytes(0)) = i.settings.count { + // Even though we are not reading anything from the input + // file, we still need to honor the `nocache` flag, which + // requests that we inform the system that we no longer + // need the contents of the input file in a system cache. + // + // TODO Better error handling for overflowing `len`. + if i.settings.iflags.nocache { + let offset = 0; + #[allow(clippy::useless_conversion)] + let len = i.src.len()?.try_into().unwrap(); + i.discard_cache(offset, len); + } + // Similarly, discard the system cache for the output file. + // + // TODO Better error handling for overflowing `len`. + if i.settings.oflags.nocache { + let offset = 0; + #[allow(clippy::useless_conversion)] + let len = o.dst.len()?.try_into().unwrap(); + o.discard_cache(offset, len); + } return finalize(&mut o, rstat, wstat, start, &prog_tx, output_thread); }; @@ -681,6 +813,13 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> { // This is the max size needed. let mut buf = vec![BUF_INIT_BYTE; bsize]; + // Index in the input file where we are reading bytes and in + // the output file where we are writing bytes. + // + // These are updated on each iteration of the main loop. + let mut read_offset = 0; + let mut write_offset = 0; + // The main read/write loop. // // Each iteration reads blocks from the input and writes @@ -700,6 +839,30 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> { } let wstat_update = o.write_blocks(&buf)?; + // Discard the system file cache for the read portion of + // the input file. + // + // TODO Better error handling for overflowing `offset` and `len`. + let read_len = rstat_update.bytes_total; + if i.settings.iflags.nocache { + let offset = read_offset.try_into().unwrap(); + let len = read_len.try_into().unwrap(); + i.discard_cache(offset, len); + } + read_offset += read_len; + + // Discard the system file cache for the written portion + // of the output file. + // + // TODO Better error handling for overflowing `offset` and `len`. + let write_len = wstat_update.bytes_total; + if o.settings.oflags.nocache { + let offset = write_offset.try_into().unwrap(); + let len = write_len.try_into().unwrap(); + o.discard_cache(offset, len); + } + write_offset += write_len; + // Update the read/write stats and inform the progress thread once per second. // // If the receiver is disconnected, `send()` returns an diff --git a/src/uu/dd/src/parseargs.rs b/src/uu/dd/src/parseargs.rs index b9a3baf09aa..781934a8302 100644 --- a/src/uu/dd/src/parseargs.rs +++ b/src/uu/dd/src/parseargs.rs @@ -304,7 +304,7 @@ impl Parser { "directory" => linux_only!(f, i.directory = true), "dsync" => linux_only!(f, i.dsync = true), "sync" => linux_only!(f, i.sync = true), - "nocache" => return Err(ParseError::Unimplemented(f.to_string())), + "nocache" => linux_only!(f, i.nocache = true), "nonblock" => linux_only!(f, i.nonblock = true), "noatime" => linux_only!(f, i.noatime = true), "noctty" => linux_only!(f, i.noctty = true), @@ -336,7 +336,7 @@ impl Parser { "directory" => linux_only!(f, o.directory = true), "dsync" => linux_only!(f, o.dsync = true), "sync" => linux_only!(f, o.sync = true), - "nocache" => return Err(ParseError::Unimplemented(f.to_string())), + "nocache" => linux_only!(f, o.nocache = true), "nonblock" => linux_only!(f, o.nonblock = true), "noatime" => linux_only!(f, o.noatime = true), "noctty" => linux_only!(f, o.noctty = true), diff --git a/src/uu/dd/src/parseargs/unit_tests.rs b/src/uu/dd/src/parseargs/unit_tests.rs index a135c3572da..54e17b882e2 100644 --- a/src/uu/dd/src/parseargs/unit_tests.rs +++ b/src/uu/dd/src/parseargs/unit_tests.rs @@ -55,7 +55,7 @@ fn unimplemented_flags_should_error() { let mut succeeded = Vec::new(); // The following flags are not implemented - for flag in ["cio", "nocache", "nolinks", "text", "binary"] { + for flag in ["cio", "nolinks", "text", "binary"] { let args = vec![format!("iflag={flag}")]; if Parser::new() diff --git a/src/uu/dd/src/progress.rs b/src/uu/dd/src/progress.rs index 51cfa92efd2..65af053b841 100644 --- a/src/uu/dd/src/progress.rs +++ b/src/uu/dd/src/progress.rs @@ -79,9 +79,9 @@ impl ProgUpdate { /// ```rust,ignore /// use std::io::Cursor; /// use std::time::Duration; - /// use crate::progress::{ProgUpdate, ReadState, WriteStat}; + /// use crate::progress::{ProgUpdate, ReadStat, WriteStat}; /// - /// let read_stat = ReadStat::new(1, 2, 3); + /// let read_stat = ReadStat::new(1, 2, 3, 999); /// let write_stat = WriteStat::new(4, 5, 6); /// let duration = Duration::new(789, 0); /// let prog_update = ProgUpdate { @@ -121,7 +121,7 @@ impl ProgUpdate { /// ```rust,ignore /// use std::io::Cursor; /// use std::time::Duration; - /// use crate::progress::{ProgUpdate, ReadState, WriteStat}; + /// use crate::progress::ProgUpdate; /// /// let prog_update = ProgUpdate { /// read_stat: Default::default(), @@ -191,7 +191,7 @@ impl ProgUpdate { /// ```rust,ignore /// use std::io::Cursor; /// use std::time::Duration; - /// use crate::progress::{ProgUpdate, ReadState, WriteStat}; + /// use crate::progress::ProgUpdate; /// /// let prog_update = ProgUpdate { /// read_stat: Default::default(), @@ -276,16 +276,20 @@ pub(crate) struct ReadStat { /// /// A truncated record can only occur in `conv=block` mode. pub(crate) records_truncated: u32, + + /// The total number of bytes read. + pub(crate) bytes_total: u64, } impl ReadStat { /// Create a new instance. #[allow(dead_code)] - fn new(complete: u64, partial: u64, truncated: u32) -> Self { + fn new(complete: u64, partial: u64, truncated: u32, bytes_total: u64) -> Self { Self { reads_complete: complete, reads_partial: partial, records_truncated: truncated, + bytes_total, } } @@ -315,6 +319,7 @@ impl std::ops::AddAssign for ReadStat { reads_complete: self.reads_complete + other.reads_complete, reads_partial: self.reads_partial + other.reads_partial, records_truncated: self.records_truncated + other.records_truncated, + bytes_total: self.bytes_total + other.bytes_total, } } } @@ -514,7 +519,7 @@ mod tests { #[test] fn test_read_stat_report() { - let read_stat = ReadStat::new(1, 2, 3); + let read_stat = ReadStat::new(1, 2, 3, 4); let mut cursor = Cursor::new(vec![]); read_stat.report(&mut cursor).unwrap(); assert_eq!(cursor.get_ref(), b"1+2 records in\n"); @@ -530,7 +535,7 @@ mod tests { #[test] fn test_prog_update_write_io_lines() { - let read_stat = ReadStat::new(1, 2, 3); + let read_stat = ReadStat::new(1, 2, 3, 4); let write_stat = WriteStat::new(4, 5, 6); let duration = Duration::new(789, 0); let complete = false; diff --git a/tests/by-util/test_dd.rs b/tests/by-util/test_dd.rs index 99eae480945..79b13960241 100644 --- a/tests/by-util/test_dd.rs +++ b/tests/by-util/test_dd.rs @@ -1536,3 +1536,29 @@ fn test_multiple_processes_reading_stdin() { .succeeds() .stdout_only("def\n"); } + +/// Test that discarding system file cache fails for stdin. +#[test] +#[cfg(target_os = "linux")] +fn test_nocache_stdin_error() { + #[cfg(not(target_env = "musl"))] + let detail = "Illegal seek"; + #[cfg(target_env = "musl")] + let detail = "Invalid seek"; + new_ucmd!() + .args(&["iflag=nocache", "count=0", "status=noxfer"]) + .fails() + .code_is(1) + .stderr_only(format!("dd: failed to discard cache for: 'standard input': {detail}\n0+0 records in\n0+0 records out\n")); +} + +/// Test for discarding system file cache. +#[test] +#[cfg(target_os = "linux")] +fn test_nocache_file() { + let (at, mut ucmd) = at_and_ucmd!(); + at.write_bytes("f", b"a".repeat(1 << 20).as_slice()); + ucmd.args(&["if=f", "of=/dev/null", "iflag=nocache", "status=noxfer"]) + .succeeds() + .stderr_only("2048+0 records in\n2048+0 records out\n"); +}