Skip to content

Commit

Permalink
shuf: improve performance
Browse files Browse the repository at this point in the history
Use memchr crate to speed up splitting input data by a separator.

Signed-off-by: Christian Menges <[email protected]>
  • Loading branch information
Garfield96 committed Jun 16, 2022
1 parent 95de5f6 commit 19a43bf
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 12 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/uu/shuf/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ path = "src/shuf.rs"

[dependencies]
clap = { version = "3.1", features = ["wrap_help", "cargo"] }
memchr = "2.5.0"
rand = "0.8"
rand_core = "0.6"
uucore = { version=">=0.0.11", package="uucore", path="../../uucore" }
Expand Down
17 changes: 5 additions & 12 deletions src/uu/shuf/src/shuf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
// spell-checker:ignore (ToDO) cmdline evec seps rvec fdata

use clap::{crate_version, Arg, Command, Values};
use memchr::memchr_iter;
use rand::prelude::SliceRandom;
use rand::RngCore;
use std::fs::File;
Expand Down Expand Up @@ -218,20 +219,12 @@ fn find_seps(data: &mut Vec<&[u8]>, sep: u8) {
if data[i].contains(&sep) {
let this = data.swap_remove(i);
let mut p = 0;
let mut i = 1;
loop {
if i == this.len() {
break;
}

if this[i] == sep {
data.push(&this[p..i]);
p = i + 1;
}
i += 1;
for i in memchr_iter(sep, this) {
data.push(&this[p..i]);
p = i + 1;
}
if p < this.len() {
data.push(&this[p..i]);
data.push(&this[p..]);
}
}
}
Expand Down

0 comments on commit 19a43bf

Please sign in to comment.