From 8bdee49cdd24e9db3a1a51f6137ea26fa30f7b54 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Thu, 28 Jul 2022 05:38:09 -0700 Subject: [PATCH] Speed up sum by using reasonable read buffer sizes. (#3741) * Speed up sum by using reasonable read buffer sizes. Use a 4K read buffer for each of the checksum functions, which seems reasonable. This improves the performance of BSD checksums on odyssey1024.txt from 399ms to 325ms on my laptop, and of SysV checksums from 242ms to 67ms. * Add BENCHMARKING.md for `sum`. * Add comment regarding block sizes. * Improve portability of BENCHMARKING.md * Make `div_ceil` const and enhance comment. --- src/uu/sum/BENCHMARKING.md | 23 +++++++++++++++++++++++ src/uu/sum/src/sum.rs | 23 +++++++++++++++++------ 2 files changed, 40 insertions(+), 6 deletions(-) create mode 100644 src/uu/sum/BENCHMARKING.md diff --git a/src/uu/sum/BENCHMARKING.md b/src/uu/sum/BENCHMARKING.md new file mode 100644 index 00000000000..d93e4220f74 --- /dev/null +++ b/src/uu/sum/BENCHMARKING.md @@ -0,0 +1,23 @@ +## Benchmarking `sum` + + + +Large sample files can for example be found in the [Wikipedia database dumps](https://dumps.wikimedia.org/wikidatawiki/latest/), usually sized at multiple gigabytes and comprising more than 100M lines. + +After you have obtained and uncompressed such a file, you need to build `sum` in release mode + +```shell +$ cargo build --release --package uu_sum +``` + +and then you can time how it long it takes to checksum the file by running + +```shell +$ time ./target/release/sum wikidatawiki-20211001-pages-logging.xml +``` + +For more systematic measurements that include warm-ups, repetitions and comparisons, [Hyperfine](https://github.com/sharkdp/hyperfine) can be helpful. For example, to compare this implementation to the one provided by your distribution run + +```shell +$ hyperfine "./target/release/sum wikidatawiki-20211001-pages-logging.xml" "sum wikidatawiki-20211001-pages-logging.xml" +``` diff --git a/src/uu/sum/src/sum.rs b/src/uu/sum/src/sum.rs index 9280fcf813f..7f3628c2a8a 100644 --- a/src/uu/sum/src/sum.rs +++ b/src/uu/sum/src/sum.rs @@ -23,14 +23,21 @@ static USAGE: &str = "{} [OPTION]... [FILE]..."; static SUMMARY: &str = "Checksum and count the blocks in a file.\n\ With no FILE, or when FILE is -, read standard input."; +// This can be replaced with usize::div_ceil once it is stabilized. +// This implementation approach is optimized for when `b` is a constant, +// particularly a power of two. +const fn div_ceil(a: usize, b: usize) -> usize { + (a + b - 1) / b +} + fn bsd_sum(mut reader: Box) -> (usize, u16) { - let mut buf = [0; 1024]; - let mut blocks_read = 0; + let mut buf = [0; 4096]; + let mut bytes_read = 0; let mut checksum: u16 = 0; loop { match reader.read(&mut buf) { Ok(n) if n != 0 => { - blocks_read += 1; + bytes_read += n; for &byte in buf[..n].iter() { checksum = (checksum >> 1) + ((checksum & 1) << 15); checksum = checksum.wrapping_add(u16::from(byte)); @@ -40,18 +47,20 @@ fn bsd_sum(mut reader: Box) -> (usize, u16) { } } + // Report blocks read in terms of 1024-byte blocks. + let blocks_read = div_ceil(bytes_read, 1024); (blocks_read, checksum) } fn sysv_sum(mut reader: Box) -> (usize, u16) { - let mut buf = [0; 512]; - let mut blocks_read = 0; + let mut buf = [0; 4096]; + let mut bytes_read = 0; let mut ret = 0u32; loop { match reader.read(&mut buf) { Ok(n) if n != 0 => { - blocks_read += 1; + bytes_read += n; for &byte in buf[..n].iter() { ret = ret.wrapping_add(u32::from(byte)); } @@ -63,6 +72,8 @@ fn sysv_sum(mut reader: Box) -> (usize, u16) { ret = (ret & 0xffff) + (ret >> 16); ret = (ret & 0xffff) + (ret >> 16); + // Report blocks read in terms of 512-byte blocks. + let blocks_read = div_ceil(bytes_read, 512); (blocks_read, ret as u16) }