Skip to content

Commit

Permalink
Add support for AArch64 CRC32 instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
valpackett committed Dec 9, 2018
1 parent 1bc367a commit 0d4aaa1
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 0 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,16 @@ keywords = ["checksum", "crc", "crc32", "simd", "fast"]

[dependencies]
cfg-if = "0.1"
stdsimd = { path = "../../rust-lang-nursery/stdsimd/crates/stdsimd" }

[dev-dependencies]
bencher = "0.1"
quickcheck = { version = "0.6", default-features = false }
rand = "0.4"

[features]
nightly = []

[[bench]]
name = "bench"
harness = false
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ This crate contains multiple CRC32 implementations:

- A fast baseline implementation which processes up to 16 bytes per iteration
- An optimized implementation for modern `x86` using `sse` and `pclmulqdq` instructions
- An optimized implementation for `aarch64` using `crc32` instructions

Calling the `Hasher::new` constructor at runtime will perform a feature detection to select the most
optimal implementation for the current CPU feature set.
Expand Down
7 changes: 7 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
//! Calling the `Hasher::new` constructor at runtime will perform a feature detection to select the most
//! optimal implementation for the current CPU feature set.

#![cfg_attr(all(feature = "nightly", target_arch = "aarch64"), feature(stdsimd, aarch64_target_feature))]

// XXX: remove this
#[cfg(all(feature = "nightly", target_arch = "aarch64"))]
#[macro_use]
extern crate stdsimd;

#[deny(missing_docs)]
#[cfg(test)]
#[macro_use]
Expand Down
86 changes: 86 additions & 0 deletions src/specialized/aarch64.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
//use std::arch::aarch64 as arch;
use stdsimd::arch::aarch64 as arch;

#[derive(Clone)]
pub struct State {
state: u32,
}

impl State {
pub fn new() -> Option<Self> {
if is_aarch64_feature_detected!("crc") {
// SAFETY: The conditions above ensure that all
// required instructions are supported by the CPU.
Some(Self { state: 0 })
} else {
None
}
}

pub fn update(&mut self, buf: &[u8]) {
// SAFETY: The `State::new` constructor ensures that all
// required instructions are supported by the CPU.
self.state = unsafe { calculate(self.state, buf) }
}

pub fn finalize(self) -> u32 {
self.state
}

pub fn reset(&mut self) {
self.state = 0;
}

pub fn combine(&mut self, other: u32, amount: u64) {
self.state = ::combine::combine(self.state, other, amount);
}
}

// target_feature is necessary to allow rustc to inline the crc32* wrappers
#[target_feature(enable = "crc")]
pub unsafe fn calculate(crc: u32, data: &[u8]) -> u32 {
let mut c32 = !crc;
let (pre_quad, quads, post_quad) = data.align_to::<u64>();

c32 = pre_quad.iter().fold(c32, |acc, &b| arch::crc32b(acc, b));

// unrolling increases performance by a lot
let mut quad_iter = quads.chunks_exact(8);
for chunk in &mut quad_iter {
c32 = arch::crc32x(c32, chunk[0]);
c32 = arch::crc32x(c32, chunk[1]);
c32 = arch::crc32x(c32, chunk[2]);
c32 = arch::crc32x(c32, chunk[3]);
c32 = arch::crc32x(c32, chunk[4]);
c32 = arch::crc32x(c32, chunk[5]);
c32 = arch::crc32x(c32, chunk[6]);
c32 = arch::crc32x(c32, chunk[7]);
}
c32 = quad_iter.remainder().iter().fold(c32, |acc, &q| arch::crc32x(acc, q));

c32 = post_quad.iter().fold(c32, |acc, &b| arch::crc32b(acc, b));

!c32
}

#[cfg(test)]
mod test {
quickcheck! {
fn check_against_baseline(chunks: Vec<(Vec<u8>, usize)>) -> bool {
let mut baseline = super::super::super::baseline::State::new();
let mut aarch64 = super::State::new().expect("not supported");
for (chunk, mut offset) in chunks {
// simulate random alignments by offsetting the slice by up to 15 bytes
offset = offset & 0xF;
if chunk.len() <= offset {
baseline.update(&chunk);
aarch64.update(&chunk);
} else {
baseline.update(&chunk[offset..]);
aarch64.update(&chunk[offset..]);
}
}
aarch64.finalize() == baseline.finalize()
}
}
}
3 changes: 3 additions & 0 deletions src/specialized/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ cfg_if! {
))] {
mod pclmulqdq;
pub use self::pclmulqdq::State;
} else if #[cfg(all(feature = "nightly", target_arch = "aarch64"))] {
mod aarch64;
pub use self::aarch64::State;
} else {
#[derive(Clone)]
pub enum State {}
Expand Down

0 comments on commit 0d4aaa1

Please sign in to comment.