From 4cc058a78903252cdf1eec3c11e735f158b176f7 Mon Sep 17 00:00:00 2001 From: Patrick Jackson Date: Tue, 7 Jun 2022 10:45:19 -0700 Subject: [PATCH 1/3] dd: reuse buffer for the most common cases --- src/uu/dd/src/dd.rs | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index 115da8bccfd..46c35d0ecec 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -414,6 +414,10 @@ where let (prog_tx, rx) = mpsc::channel(); thread::spawn(gen_prog_updater(rx, i.print_level)); + // Create a common buffer with a capacity of the block size. + // This is the max size needed. + let mut buf = vec![BUF_INIT_BYTE; bsize]; + // The main read/write loop. // // Each iteration reads blocks from the input and writes @@ -427,7 +431,7 @@ where // best buffer size for reading based on the number of // blocks already read and the number of blocks remaining. let loop_bsize = calc_loop_bsize(&i.count, &rstat, &wstat, i.ibs, bsize); - let (rstat_update, buf) = read_helper(&mut i, loop_bsize)?; + let rstat_update = read_helper(&mut i, &mut buf, loop_bsize)?; if rstat_update.is_empty() { break; } @@ -600,7 +604,11 @@ impl Write for Output { } /// Read helper performs read operations common to all dd reads, and dispatches the buffer to relevant helper functions as dictated by the operations requested by the user. -fn read_helper(i: &mut Input, bsize: usize) -> std::io::Result<(ReadStat, Vec)> { +fn read_helper( + i: &mut Input, + mut buf: &mut Vec, + bsize: usize, +) -> std::io::Result { // Local Helper Fns ------------------------------------------------- fn perform_swab(buf: &mut [u8]) { for base in (1..buf.len()).step_by(2) { @@ -609,14 +617,16 @@ fn read_helper(i: &mut Input, bsize: usize) -> std::io::Result<(Read } // ------------------------------------------------------------------ // Read - let mut buf = vec![BUF_INIT_BYTE; bsize]; + // Resize the buffer to the bsize. Any garbage data in the buffer is overwritten or truncated, so there is no need to fill with BUF_INIT_BYTE first. + buf.resize(bsize, BUF_INIT_BYTE); + let mut rstat = match i.cflags.sync { Some(ch) => i.fill_blocks(&mut buf, ch)?, _ => i.fill_consecutive(&mut buf)?, }; // Return early if no data if rstat.reads_complete == 0 && rstat.reads_partial == 0 { - return Ok((rstat, buf)); + return Ok(rstat); } // Perform any conv=x[,x...] options @@ -626,10 +636,10 @@ fn read_helper(i: &mut Input, bsize: usize) -> std::io::Result<(Read match i.cflags.mode { Some(ref mode) => { - let buf = conv_block_unblock_helper(buf, mode, &mut rstat); - Ok((rstat, buf)) + *buf = conv_block_unblock_helper(buf.clone(), mode, &mut rstat); + Ok(rstat) } - None => Ok((rstat, buf)), + None => Ok(rstat), } } From a186adbff1119adfeba54e6ab1aa8751e0635fb9 Mon Sep 17 00:00:00 2001 From: Patrick Jackson Date: Thu, 9 Jun 2022 09:27:42 -0700 Subject: [PATCH 2/3] dd: fixing clippy warnings. --- src/uu/dd/src/dd.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index 46c35d0ecec..e47f923eec6 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -606,7 +606,7 @@ impl Write for Output { /// Read helper performs read operations common to all dd reads, and dispatches the buffer to relevant helper functions as dictated by the operations requested by the user. fn read_helper( i: &mut Input, - mut buf: &mut Vec, + buf: &mut Vec, bsize: usize, ) -> std::io::Result { // Local Helper Fns ------------------------------------------------- @@ -621,8 +621,8 @@ fn read_helper( buf.resize(bsize, BUF_INIT_BYTE); let mut rstat = match i.cflags.sync { - Some(ch) => i.fill_blocks(&mut buf, ch)?, - _ => i.fill_consecutive(&mut buf)?, + Some(ch) => i.fill_blocks(buf, ch)?, + _ => i.fill_consecutive(buf)?, }; // Return early if no data if rstat.reads_complete == 0 && rstat.reads_partial == 0 { @@ -631,7 +631,7 @@ fn read_helper( // Perform any conv=x[,x...] options if i.cflags.swab { - perform_swab(&mut buf); + perform_swab(buf); } match i.cflags.mode { From 881f0c3d0604e1c2098e7c4e2d05677046b0d8d3 Mon Sep 17 00:00:00 2001 From: Patrick Jackson Date: Thu, 9 Jun 2022 11:01:30 -0700 Subject: [PATCH 3/3] dd: add BENCHMARKING instructions --- src/uu/dd/BENCHMARKING.md | 62 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 src/uu/dd/BENCHMARKING.md diff --git a/src/uu/dd/BENCHMARKING.md b/src/uu/dd/BENCHMARKING.md new file mode 100644 index 00000000000..57a19bf5c23 --- /dev/null +++ b/src/uu/dd/BENCHMARKING.md @@ -0,0 +1,62 @@ +# Benchmarking dd + +`dd` is a utility used for copying and converting files. It is often used for +writing directly to devices, such as when writing an `.iso` file directly to a +drive. + +## Understanding dd + +At the core, `dd` has a simple loop of operation. It reads in `blocksize` bytes +from an input, optionally performs a conversion on the bytes, and then writes +`blocksize` bytes to an output file. + +In typical usage, the performance of `dd` is dominated by the speed at which it +can read or write to the filesystem. For those scenarios it is best to optimize +the blocksize for the performance of the devices being read/written to. Devices +typically have an optimal block size that they work best at, so for maximum +performance `dd` should be using a block size, or multiple of the block size, +that the underlying devices prefer. + +For benchmarking `dd` itself we will use fast special files provided by the +operating system that work out of RAM, `/dev/zero` and `/dev/null`. This reduces +the time taken reading/writing files to a minimum and maximises the percentage +time we spend in the `dd` tool itself, but care still needs to be taken to +understand where we are benchmarking the `dd` tool and where we are just +benchmarking memory performance. + +The main parameter to vary for a `dd` benchmark is the blocksize, but benchmarks +testing the conversions that are supported by `dd` could also be interesting. + +`dd` has a convenient `count` argument, that will copy `count` blocks of data +from the input to the output, which is useful for benchmarking. + +## Blocksize Benchmarks + +When measuring the impact of blocksize on the throughput, we want to avoid +testing the startup time of `dd`. `dd` itself will give a report on the +throughput speed once complete, but it's probably better to use an external +tool, such as `hyperfine` to measure the performance. + +Benchmarks should be sized so that they run for a handful of seconds at a +minimum to avoid measuring the startup time unnecessarily. The total time will +be roughly equivalent to the total bytes copied (`blocksize` x `count`). + +Some useful invocations for testing would be the following: + +``` +hyperfine "./target/release/dd bs=4k count=1000000 < /dev/zero > /dev/null" +hyperfine "./target/release/dd bs=1M count=20000 < /dev/zero > /dev/null" +hyperfine "./target/release/dd bs=1G count=10 < /dev/zero > /dev/null" +``` + +Choosing what to benchmark depends greatly on what you want to measure. +Typically you would choose a small blocksize for measuring the performance of +`dd`, as that would maximize the overhead introduced by the `dd` tool. `dd` +typically does some set amount of work per block which only depends on the size +of the block if conversions are used. + +As an example, https://github.com/uutils/coreutils/pull/3600 made a change to +reuse the same buffer between block copies, avoiding the need to reallocate a +new block of memory for each copy. The impact of that change mostly had an +impact on large block size copies because those are the circumstances where the +memory performance dominated the total performance.