Skip to content

Commit

Permalink
Merge pull request #4432 from jfinkels/dd-nocache
Browse files Browse the repository at this point in the history
dd: support the [io]flag=nocache option
  • Loading branch information
sylvestre authored May 12, 2023
2 parents 98fa941 + 4ff318a commit 64c49de
Show file tree
Hide file tree
Showing 7 changed files with 211 additions and 13 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions src/uu/dd/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ gcd = { workspace=true }
libc = { workspace=true }
uucore = { workspace=true, features=["memo"] }

[target.'cfg(any(target_os = "linux"))'.dependencies]
nix = { workspace=true, features = ["fs"] }

[target.'cfg(any(target_os = "linux", target_os = "android"))'.dependencies]
signal-hook = { workspace=true }

Expand Down
169 changes: 166 additions & 3 deletions src/uu/dd/src/dd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, behaviour, bmax, bremain, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, iseek, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, oseek, outfile, parseargs, rlen, rmax, rremain, rsofar, rstat, sigusr, wlen, wstat seekable oconv canonicalized
// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, behaviour, bmax, bremain, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, iseek, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, oseek, outfile, parseargs, rlen, rmax, rremain, rsofar, rstat, sigusr, wlen, wstat seekable oconv canonicalized fadvise Fadvise FADV DONTNEED ESPIPE

mod datastructures;
use datastructures::*;
Expand Down Expand Up @@ -42,9 +42,16 @@ use std::time;

use clap::{crate_version, Arg, Command};
use gcd::Gcd;
#[cfg(target_os = "linux")]
use nix::{
errno::Errno,
fcntl::{posix_fadvise, PosixFadviseAdvice},
};
use uucore::display::Quotable;
use uucore::error::{FromIo, UResult};
use uucore::{format_usage, help_about, help_section, help_usage, show_error};
#[cfg(target_os = "linux")]
use uucore::{show, show_if_err};

const ABOUT: &str = help_about!("dd.md");
const AFTER_HELP: &str = help_section!("after help", "dd.md");
Expand Down Expand Up @@ -131,6 +138,16 @@ impl Source {
Self::StdinFile(f)
}

/// The length of the data source in number of bytes.
///
/// If it cannot be determined, then this function returns 0.
fn len(&self) -> std::io::Result<i64> {
match self {
Self::File(f) => Ok(f.metadata()?.len().try_into().unwrap_or(i64::MAX)),
_ => Ok(0),
}
}

fn skip(&mut self, n: u64) -> io::Result<u64> {
match self {
#[cfg(not(unix))]
Expand All @@ -156,6 +173,23 @@ impl Source {
Self::Fifo(f) => io::copy(&mut f.take(n), &mut io::sink()),
}
}

/// Discard the system file cache for the given portion of the data source.
///
/// `offset` and `len` specify a contiguous portion of the data
/// source. This function informs the kernel that the specified
/// portion of the source is no longer needed. If not possible,
/// then this function returns an error.
#[cfg(target_os = "linux")]
fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) -> nix::Result<()> {
match self {
Self::File(f) => {
let advice = PosixFadviseAdvice::POSIX_FADV_DONTNEED;
posix_fadvise(f.as_raw_fd(), offset, len, advice)
}
_ => Err(Errno::ESPIPE), // "Illegal seek"
}
}
}

impl Read for Source {
Expand Down Expand Up @@ -296,6 +330,29 @@ impl<'a> Read for Input<'a> {
}

impl<'a> Input<'a> {
/// Discard the system file cache for the given portion of the input.
///
/// `offset` and `len` specify a contiguous portion of the input.
/// This function informs the kernel that the specified portion of
/// the input file is no longer needed. If not possible, then this
/// function prints an error message to stderr and sets the exit
/// status code to 1.
#[allow(unused_variables)]
fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) {
#[cfg(target_os = "linux")]
{
show_if_err!(self
.src
.discard_cache(offset, len)
.map_err_context(|| "failed to discard cache for: 'standard input'".to_string()));
}
#[cfg(not(target_os = "linux"))]
{
// TODO Is there a way to discard filesystem cache on
// these other operating systems?
}
}

/// Fills a given buffer.
/// Reads in increments of 'self.ibs'.
/// The start of each ibs-sized read follows the previous one.
Expand All @@ -317,13 +374,13 @@ impl<'a> Input<'a> {
_ => break,
}
}

buf.truncate(bytes_total);
Ok(ReadStat {
reads_complete,
reads_partial,
// Records are not truncated when filling.
records_truncated: 0,
bytes_total: bytes_total.try_into().unwrap(),
})
}

Expand All @@ -334,6 +391,7 @@ impl<'a> Input<'a> {
let mut reads_complete = 0;
let mut reads_partial = 0;
let mut base_idx = 0;
let mut bytes_total = 0;

while base_idx < buf.len() {
let next_blk = cmp::min(base_idx + self.settings.ibs, buf.len());
Expand All @@ -342,11 +400,13 @@ impl<'a> Input<'a> {
match self.read(&mut buf[base_idx..next_blk])? {
0 => break,
rlen if rlen < target_len => {
bytes_total += rlen;
reads_partial += 1;
let padding = vec![pad; target_len - rlen];
buf.splice(base_idx + rlen..next_blk, padding.into_iter());
}
_ => {
rlen => {
bytes_total += rlen;
reads_complete += 1;
}
}
Expand All @@ -359,6 +419,7 @@ impl<'a> Input<'a> {
reads_complete,
reads_partial,
records_truncated: 0,
bytes_total: bytes_total.try_into().unwrap(),
})
}
}
Expand Down Expand Up @@ -447,6 +508,33 @@ impl Dest {
_ => Ok(()),
}
}

/// Discard the system file cache for the given portion of the destination.
///
/// `offset` and `len` specify a contiguous portion of the
/// destination. This function informs the kernel that the
/// specified portion of the destination is no longer needed. If
/// not possible, then this function returns an error.
#[cfg(target_os = "linux")]
fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) -> nix::Result<()> {
match self {
Self::File(f, _) => {
let advice = PosixFadviseAdvice::POSIX_FADV_DONTNEED;
posix_fadvise(f.as_raw_fd(), offset, len, advice)
}
_ => Err(Errno::ESPIPE), // "Illegal seek"
}
}

/// The length of the data destination in number of bytes.
///
/// If it cannot be determined, then this function returns 0.
fn len(&self) -> std::io::Result<i64> {
match self {
Self::File(f, _) => Ok(f.metadata()?.len().try_into().unwrap_or(i64::MAX)),
_ => Ok(0),
}
}
}

/// Decide whether the given buffer is all zeros.
Expand Down Expand Up @@ -580,6 +668,29 @@ impl<'a> Output<'a> {
Ok(Self { dst, settings })
}

/// Discard the system file cache for the given portion of the output.
///
/// `offset` and `len` specify a contiguous portion of the output.
/// This function informs the kernel that the specified portion of
/// the output file is no longer needed. If not possible, then
/// this function prints an error message to stderr and sets the
/// exit status code to 1.
#[allow(unused_variables)]
fn discard_cache(&self, offset: libc::off_t, len: libc::off_t) {
#[cfg(target_os = "linux")]
{
show_if_err!(self
.dst
.discard_cache(offset, len)
.map_err_context(|| "failed to discard cache for: 'standard output'".to_string()));
}
#[cfg(target_os = "linux")]
{
// TODO Is there a way to discard filesystem cache on
// these other operating systems?
}
}

/// Write the given bytes one block at a time.
///
/// This may write partial blocks (for example, if the underlying
Expand Down Expand Up @@ -674,13 +785,41 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> {
// Optimization: if no blocks are to be written, then don't
// bother allocating any buffers.
if let Some(Num::Blocks(0) | Num::Bytes(0)) = i.settings.count {
// Even though we are not reading anything from the input
// file, we still need to honor the `nocache` flag, which
// requests that we inform the system that we no longer
// need the contents of the input file in a system cache.
//
// TODO Better error handling for overflowing `len`.
if i.settings.iflags.nocache {
let offset = 0;
#[allow(clippy::useless_conversion)]
let len = i.src.len()?.try_into().unwrap();
i.discard_cache(offset, len);
}
// Similarly, discard the system cache for the output file.
//
// TODO Better error handling for overflowing `len`.
if i.settings.oflags.nocache {
let offset = 0;
#[allow(clippy::useless_conversion)]
let len = o.dst.len()?.try_into().unwrap();
o.discard_cache(offset, len);
}
return finalize(&mut o, rstat, wstat, start, &prog_tx, output_thread);
};

// Create a common buffer with a capacity of the block size.
// This is the max size needed.
let mut buf = vec![BUF_INIT_BYTE; bsize];

// Index in the input file where we are reading bytes and in
// the output file where we are writing bytes.
//
// These are updated on each iteration of the main loop.
let mut read_offset = 0;
let mut write_offset = 0;

// The main read/write loop.
//
// Each iteration reads blocks from the input and writes
Expand All @@ -700,6 +839,30 @@ fn dd_copy(mut i: Input, mut o: Output) -> std::io::Result<()> {
}
let wstat_update = o.write_blocks(&buf)?;

// Discard the system file cache for the read portion of
// the input file.
//
// TODO Better error handling for overflowing `offset` and `len`.
let read_len = rstat_update.bytes_total;
if i.settings.iflags.nocache {
let offset = read_offset.try_into().unwrap();
let len = read_len.try_into().unwrap();
i.discard_cache(offset, len);
}
read_offset += read_len;

// Discard the system file cache for the written portion
// of the output file.
//
// TODO Better error handling for overflowing `offset` and `len`.
let write_len = wstat_update.bytes_total;
if o.settings.oflags.nocache {
let offset = write_offset.try_into().unwrap();
let len = write_len.try_into().unwrap();
o.discard_cache(offset, len);
}
write_offset += write_len;

// Update the read/write stats and inform the progress thread once per second.
//
// If the receiver is disconnected, `send()` returns an
Expand Down
4 changes: 2 additions & 2 deletions src/uu/dd/src/parseargs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ impl Parser {
"directory" => linux_only!(f, i.directory = true),
"dsync" => linux_only!(f, i.dsync = true),
"sync" => linux_only!(f, i.sync = true),
"nocache" => return Err(ParseError::Unimplemented(f.to_string())),
"nocache" => linux_only!(f, i.nocache = true),
"nonblock" => linux_only!(f, i.nonblock = true),
"noatime" => linux_only!(f, i.noatime = true),
"noctty" => linux_only!(f, i.noctty = true),
Expand Down Expand Up @@ -336,7 +336,7 @@ impl Parser {
"directory" => linux_only!(f, o.directory = true),
"dsync" => linux_only!(f, o.dsync = true),
"sync" => linux_only!(f, o.sync = true),
"nocache" => return Err(ParseError::Unimplemented(f.to_string())),
"nocache" => linux_only!(f, o.nocache = true),
"nonblock" => linux_only!(f, o.nonblock = true),
"noatime" => linux_only!(f, o.noatime = true),
"noctty" => linux_only!(f, o.noctty = true),
Expand Down
2 changes: 1 addition & 1 deletion src/uu/dd/src/parseargs/unit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ fn unimplemented_flags_should_error() {
let mut succeeded = Vec::new();

// The following flags are not implemented
for flag in ["cio", "nocache", "nolinks", "text", "binary"] {
for flag in ["cio", "nolinks", "text", "binary"] {
let args = vec![format!("iflag={flag}")];

if Parser::new()
Expand Down
Loading

0 comments on commit 64c49de

Please sign in to comment.