From 0df4359c17779aab542e665d549546f932dac53b Mon Sep 17 00:00:00 2001 From: Artyom Pavlov Date: Sun, 18 Feb 2018 09:55:57 +0300 Subject: [PATCH] CLMUL instruction set (#320) * added pclmul * added docs * pclmul -> pclmulqdq * imm8: u8 -> imm8: i32 * return changes to stdsimd/arch/detect/x86.rs * error fixes * added rustc_args_required_const * fixed assert_instr for _mm_clmulepi64_si128 * fixed pclmul assert_instr tests --- coresimd/x86/i686/mod.rs | 3 ++ coresimd/x86/i686/pclmulqdq.rs | 70 ++++++++++++++++++++++++++++++++++ stdsimd/arch/detect/x86.rs | 8 ++++ 3 files changed, 81 insertions(+) create mode 100644 coresimd/x86/i686/pclmulqdq.rs diff --git a/coresimd/x86/i686/mod.rs b/coresimd/x86/i686/mod.rs index ad40c73c6d9a4..7dd55802e3eaf 100644 --- a/coresimd/x86/i686/mod.rs +++ b/coresimd/x86/i686/mod.rs @@ -6,6 +6,9 @@ pub use self::aes::*; mod mmx; pub use self::mmx::*; +mod pclmulqdq; +pub use self::pclmulqdq::*; + mod sse; pub use self::sse::*; diff --git a/coresimd/x86/i686/pclmulqdq.rs b/coresimd/x86/i686/pclmulqdq.rs new file mode 100644 index 0000000000000..8c4f4b85742ab --- /dev/null +++ b/coresimd/x86/i686/pclmulqdq.rs @@ -0,0 +1,70 @@ +//! Carry-less Multiplication (CLMUL) +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref] (p. 4-241). +//! +//! [intel64_ref]: http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + +use coresimd::x86::__m128i; + +#[cfg(test)] +use stdsimd_test::assert_instr; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.x86.pclmulqdq"] + fn pclmulqdq(a: __m128i, round_key: __m128i, imm8: u8) -> __m128i; +} + +/// Perform a carry-less multiplication of two 64-bit polynomials over the +/// finite field GF(2^k). +/// +/// The immediate byte is used for determining which halves of `a` and `b` +/// should be used. Immediate bits other than 0 and 4 are ignored. +#[inline] +#[target_feature(enable = "pclmulqdq")] +#[cfg_attr(all(test, not(target_os="linux")), assert_instr(pclmulqdq, imm8 = 0))] +#[cfg_attr(all(test, target_os="linux"), assert_instr(pclmullqlqdq, imm8 = 0))] +#[cfg_attr(all(test, target_os="linux"), assert_instr(pclmulhqlqdq, imm8 = 1))] +#[cfg_attr(all(test, target_os="linux"), assert_instr(pclmullqhqdq, imm8 = 16))] +#[cfg_attr(all(test, target_os="linux"), assert_instr(pclmulhqhqdq, imm8 = 17))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm_clmulepi64_si128(a: __m128i, b: __m128i, imm8: i32) -> __m128i { + macro_rules! call { + ($imm8:expr) => (pclmulqdq(a, b, $imm8)) + } + constify_imm8!(imm8, call) +} + + +#[cfg(test)] +mod tests { + // The constants in the tests below are just bit patterns. They should not + // be interpreted as integers; signedness does not make sense for them, but + // __m128i happens to be defined in terms of signed integers. + #![allow(overflowing_literals)] + + use stdsimd_test::simd_test; + + use coresimd::x86::*; + + #[simd_test = "pclmulqdq"] + unsafe fn test_mm_clmulepi64_si128() { + // Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf + let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d); + let b = _mm_set_epi64x(0x4869285368617929, 0x5b477565726f6e5d); + let r00 = _mm_set_epi64x(0x1d4d84c85c3440c0, 0x929633d5d36f0451); + let r01 = _mm_set_epi64x(0x1bd17c8d556ab5a1, 0x7fa540ac2a281315); + let r10 = _mm_set_epi64x(0x1a2bf6db3a30862f, 0xbabf262df4b7d5c9); + let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed); + + assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x00), r00); + assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x10), r01); + assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x01), r10); + assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x11), r11); + + let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000); + let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000); + assert_eq_m128i(_mm_clmulepi64_si128(a0, a0, 0x00), r); + } +} diff --git a/stdsimd/arch/detect/x86.rs b/stdsimd/arch/detect/x86.rs index e7b95a93c77f1..83532f11687ff 100644 --- a/stdsimd/arch/detect/x86.rs +++ b/stdsimd/arch/detect/x86.rs @@ -30,6 +30,9 @@ macro_rules! is_target_feature_detected { ("aes") => { $crate::arch::detect::check_for( $crate::arch::detect::Feature::aes) }; + ("pclmulqdq") => { + $crate::arch::detect::check_for( + $crate::arch::detect::Feature::pclmulqdq) }; ("tsc") => { $crate::arch::detect::check_for( $crate::arch::detect::Feature::tsc) }; @@ -174,6 +177,8 @@ macro_rules! is_target_feature_detected { pub enum Feature { /// AES (Advanced Encryption Standard New Instructions AES-NI) aes, + /// CLMUL (Carry-less Multiplication) + pclmulqdq, /// TSC (Time Stamp Counter) tsc, /// MMX @@ -345,6 +350,7 @@ pub fn detect_features() -> cache::Initializer { enable(proc_info_ecx, 20, Feature::sse4_2); enable(proc_info_ecx, 23, Feature::popcnt); enable(proc_info_ecx, 25, Feature::aes); + enable(proc_info_ecx, 1, Feature::pclmulqdq); enable(proc_info_edx, 4, Feature::tsc); enable(proc_info_edx, 23, Feature::mmx); enable(proc_info_edx, 24, Feature::fxsr); @@ -457,6 +463,7 @@ mod tests { #[test] fn dump() { println!("aes: {:?}", is_target_feature_detected!("aes")); + println!("pclmulqdq: {:?}", is_target_feature_detected!("pclmulqdq")); println!("tsc: {:?}", is_target_feature_detected!("tsc")); println!("sse: {:?}", is_target_feature_detected!("sse")); println!("sse2: {:?}", is_target_feature_detected!("sse2")); @@ -498,6 +505,7 @@ mod tests { fn compare_with_cupid() { let information = cupid::master().unwrap(); assert_eq!(is_target_feature_detected!("aes"), information.aesni()); + assert_eq!(is_target_feature_detected!("pclmulqdq"), information.pclmulqdq()); assert_eq!(is_target_feature_detected!("tsc"), information.tsc()); assert_eq!(is_target_feature_detected!("sse"), information.sse()); assert_eq!(is_target_feature_detected!("sse2"), information.sse2());