Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inline ASM sha1 #450

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions .github/workflows/sha1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,26 @@ jobs:
override: true
- run: cargo test --no-default-features
- run: cargo test

# TODO: merge with test on MSRV bump to 1.59 or higher
# TODO: do i need to think about no-std platforms here?
test-inline-asm:
runs-on: ubuntu-latest
strategy:
matrix:
target:
- aarch64-unknown-linux-gnu
- x86_64-unknown-linux-gnu
- x86-unknown-linux-gnu
# TODO - aarch64-apple-darwin
rust:
- 1.59.0 # MSRV
steps:
- uses: actions/checkout@v3
- uses: RustCrypto/actions/cargo-cache@master
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: ${{ matrix.rust }}
override: true
- run: cargo test --features inline-asm
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions sha1/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ cfg-if = "1.0"
[target.'cfg(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64"))'.dependencies]
cpufeatures = "0.2"
sha1-asm = { version = "0.5", optional = true }
asm_block = { version = "0.1.3", optional = true }

[dev-dependencies]
digest = { version = "0.10.4", features = ["dev"] }
Expand All @@ -30,6 +31,7 @@ oid = ["digest/oid"] # Enable OID support. WARNING: Bumps MSRV to 1.57
asm = ["sha1-asm"] # WARNING: this feature SHOULD NOT be enabled by library crates
compress = [] # Expose compress function
force-soft = [] # Force software implementation
inline-asm = ["asm_block"] # TODO: i don't know why the "do not enable by library crates" warning is in the asm feature, flagging this to ask about it. # WARNING: bumps MSRV to 1.59

[package.metadata.docs.rs]
all-features = true
Expand Down
2 changes: 2 additions & 0 deletions sha1/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ We provide this crate for legacy interoperability purposes only.

Rust **1.41** or higher.

Enabling feature flag `inline-asm` requires Rust **1.59** or higher.

Minimum supported Rust version can be changed in the future, but it will be
done with a minor version bump.

Expand Down
279 changes: 279 additions & 0 deletions sha1/src/asm/aarch64.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
//! SHA-1 hash in AArch64 assembly, adapted from Emmanuel Gil Peyrot's MIT-licensed implementation
//
// /*
// * SHA-1 hash in AArch64 assembly
// *
// * Copyright (c) 2020 Emmanuel Gil Peyrot <[email protected]>. (MIT License)
// *
// * Permission is hereby granted, free of charge, to any person obtaining a copy of
// * this software and associated documentation files (the "Software"), to deal in
// * the Software without restriction, including without limitation the rights to
// * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
// * the Software, and to permit persons to whom the Software is furnished to do so,
// * subject to the following conditions:
// * - The above copyright notice and this permission notice shall be included in
// * all copies or substantial portions of the Software.
// * - The Software is provided "as is", without warranty of any kind, express or
// * implied, including but not limited to the warranties of merchantability,
// * fitness for a particular purpose and noninfringement. In no event shall the
// * authors or copyright holders be liable for any claim, damages or other
// * liability, whether in an action of contract, tort or otherwise, arising from,
// * out of or in connection with the Software or the use or other dealings in the
// * Software.
// */
use core::arch::asm;

// macro_rules! sha_1_through_4 {
// (F, $a: tt, $b: tt, $c: tt, $d: tt, $k: tt, $s: literal, $t: literal, $tmp1: tt, $tmp2: tt) => {

/// SHA1 compress function. We don't have enough registers to load the whole block,
/// so we need to use memory address to refer to the inputs. Due to possible failure
/// of register allocation on `x86`, we explicitly specify registers to use.
#[cfg(all(feature = "inline-asm", target_arch = "aarch64"))]
pub fn compress(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
let mut out_state = [0u32; 5];
// SAFETY: inline-assembly
unsafe {
asm!(
// from original code, some docs :)
// /*
// * Storage usage:
// * Bytes Location Description
// * 4 x0 state argument
// * 4 x1 block argument
// * 16 q0 W0
// * 16 q1 W1
// * 16 q2 W2
// * 16 q3 W3
// * 16 q4 k
// * 16 q5 Original ABCD
// * 16 q6 ABCD (with s3 being A)
// * 4 s16 E
// * 4 s17 e0
// * 4 s18 e1
// * 16 q19 wk
// */

// Load state in registers
// original code:
// ldr q5, [x0]
// ldr s16, [x0, 16]
// this now happens at the bottom...
// TODO what is this doing?
// i believe it's copying state[0..4] into v6 (which is also q6)
// confirmed this is the mutable copy of the first 4 words of the state
"mov v6.16b, v5.16b",

// Load block in registers
// original code:
// ldr q0, [x1]
// ldr q1, [x1, 16]
// ldr q2, [x1, 32]
// ldr q3, [x1, 48]
// this is at the bottom now

// from original code: TODO: only do that on little endian
// this flips the blocks from little to big endian
"rev32 v0.16b, v0.16b",
"rev32 v1.16b, v1.16b",
"rev32 v2.16b, v2.16b",
"rev32 v3.16b, v3.16b",

// k for the next five rounds
"adrp x1, .K0",
"ldr q4, [x1, #:lo12:.K0]",

// 0
"sha1h s18, s6",
"add v19.4s, v0.4s, v4.4s",
"sha1c q6, s16, v19.4s",
"sha1su0 v0.4s, v1.4s, v2.4s",

// 1
"sha1h s17, s6",
"add v19.4s, v1.4s, v4.4s",
"sha1c q6, s18, v19.4s",
"sha1su1 v0.4s, v3.4s",
"sha1su0 v1.4s, v2.4s, v3.4s",

// 2
"sha1h s18, s6",
"add v19.4s, v2.4s, v4.4s",
"sha1c q6, s17, v19.4s",
"sha1su1 v1.4s, v0.4s",
"sha1su0 v2.4s, v3.4s, v0.4s",

// 3
"sha1h s17, s6",
"add v19.4s, v3.4s, v4.4s",
"sha1c q6, s18, v19.4s",
"sha1su1 v2.4s, v1.4s",
"sha1su0 v3.4s, v0.4s, v1.4s",

// 4
"sha1h s18, s6",
"add v19.4s, v0.4s, v4.4s",
"sha1c q6, s17, v19.4s",
"sha1su1 v3.4s, v2.4s",
"sha1su0 v0.4s, v1.4s, v2.4s",

// k for the next five rounds
"adrp x1, .K1",
"ldr q4, [x1, #:lo12:.K1]",

// 5
"sha1h s17, s6",
"add v19.4s, v1.4s, v4.4s",
"sha1p q6, s18, v19.4s",
"sha1su1 v0.4s, v3.4s",
"sha1su0 v1.4s, v2.4s, v3.4s",

// 6
"sha1h s18, s6",
"add v19.4s, v2.4s, v4.4s",
"sha1p q6, s17, v19.4s",
"sha1su1 v1.4s, v0.4s",
"sha1su0 v2.4s, v3.4s, v0.4s",

// 7
"sha1h s17, s6",
"add v19.4s, v3.4s, v4.4s",
"sha1p q6, s18, v19.4s",
"sha1su1 v2.4s, v1.4s",
"sha1su0 v3.4s, v0.4s, v1.4s",

// 8
"sha1h s18, s6",
"add v19.4s, v0.4s, v4.4s",
"sha1p q6, s17, v19.4s",
"sha1su1 v3.4s, v2.4s",
"sha1su0 v0.4s, v1.4s, v2.4s",

// 9
"sha1h s17, s6",
"add v19.4s, v1.4s, v4.4s",
"sha1p q6, s18, v19.4s",
"sha1su1 v0.4s, v3.4s",
"sha1su0 v1.4s, v2.4s, v3.4s",

// k for the next five rounds
"adrp x1, .K2",
"ldr q4, [x1, #:lo12:.K2]",

// 10
"sha1h s18, s6",
"add v19.4s, v2.4s, v4.4s",
"sha1m q6, s17, v19.4s",
"sha1su1 v1.4s, v0.4s",
"sha1su0 v2.4s, v3.4s, v0.4s",

// 11
"sha1h s17, s6",
"add v19.4s, v3.4s, v4.4s",
"sha1m q6, s18, v19.4s",
"sha1su1 v2.4s, v1.4s",
"sha1su0 v3.4s, v0.4s, v1.4s",

// 12
"sha1h s18, s6",
"add v19.4s, v0.4s, v4.4s",
"sha1m q6, s17, v19.4s",
"sha1su1 v3.4s, v2.4s",
"sha1su0 v0.4s, v1.4s, v2.4s",

// 13
"sha1h s17, s6",
"add v19.4s, v1.4s, v4.4s",
"sha1m q6, s18, v19.4s",
"sha1su1 v0.4s, v3.4s",
"sha1su0 v1.4s, v2.4s, v3.4s",

// 14
"sha1h s18, s6",
"add v19.4s, v2.4s, v4.4s",
"sha1m q6, s17, v19.4s",
"sha1su1 v1.4s, v0.4s",
"sha1su0 v2.4s, v3.4s, v0.4s",

// k for the next five rounds
"adrp x1, .K3",
"ldr q4, [x1, #:lo12:.K3]",

// 15
"sha1h s17, s6",
"add v19.4s, v3.4s, v4.4s",
"sha1p q6, s18, v19.4s",
"sha1su1 v2.4s, v1.4s",
"sha1su0 v3.4s, v0.4s, v1.4s",

// 16
"sha1h s18, s6",
"add v19.4s, v0.4s, v4.4s",
"sha1p q6, s17, v19.4s",
"sha1su1 v3.4s, v2.4s",

// 17
"sha1h s17, s6",
"add v19.4s, v1.4s, v4.4s",
"sha1p q6, s18, v19.4s",

// 18
"sha1h s18, s6",
"add v19.4s, v2.4s, v4.4s",
"sha1p q6, s17, v19.4s",

// 19
"sha1h s17, s6",
"add v19.4s, v3.4s, v4.4s",
"sha1p q6, s18, v19.4s",

// Update state
"add v6.4s, v6.4s, v5.4s",
// source code: str q6, [x0]
// this now happens at the bottom
"add v16.2s, v16.2s, v17.2s",
// source code: str s16, [x0, 16]
// this now happens at the bottom

"ret", // TODO is this right

".align 4", // TODO ummm alignment...
".K0:", // TODO are labels just the same in inline asm in rust?
".word 0x5A827999",
".word 0x5A827999",
".word 0x5A827999",
".word 0x5A827999",
".K1:",
".word 0x6ED9EBA1",
".word 0x6ED9EBA1",
".word 0x6ED9EBA1",
".word 0x6ED9EBA1",
".K2:",
".word 0x8F1BBCDC",
".word 0x8F1BBCDC",
".word 0x8F1BBCDC",
".word 0x8F1BBCDC",
".K3:",
".word 0xCA62C1D6",
".word 0xCA62C1D6",
".word 0xCA62C1D6",
".word 0xCA62C1D6",

// state ins and outs
in("q4") state.as_mut_ptr(),
inout("s16") state[4],
lateout("q6") state as *mut u32,
// blocks in
in("q0") blocks[0][0..16].as_ptr(),
in("q1") blocks[0][16..32].as_ptr(),
in("q2") blocks[0][32..48].as_ptr(),
in("q3") blocks[0][48..64].as_ptr(),
// some clobbers
out("q5") _,
out("s17") _,
out("s18") _,
out("q19") _,
// TODO make sure there aren't any other clobbers
);
};
}
Loading