diff --git a/coresimd/src/x86/i586/avx.rs b/coresimd/src/x86/i586/avx.rs index 67e4ebb626070..b600873f38044 100644 --- a/coresimd/src/x86/i586/avx.rs +++ b/coresimd/src/x86/i586/avx.rs @@ -843,7 +843,7 @@ pub unsafe fn _mm256_extractf128_ps(a: f32x8, imm8: i32) -> __m128 { #[inline(always)] #[target_feature = "+avx"] #[cfg_attr(test, assert_instr(vextractf128))] -pub unsafe fn _mm256_extractf128_pd(a: f64x4, imm8: i32) -> f64x2 { +pub unsafe fn _mm256_extractf128_pd(a: f64x4, imm8: i32) -> __m128d { match imm8 & 1 { 0 => simd_shuffle2(a, _mm256_undefined_pd(), [0, 1]), _ => simd_shuffle2(a, _mm256_undefined_pd(), [2, 3]), @@ -1068,9 +1068,7 @@ pub unsafe fn _mm256_permute_pd(a: f64x4, imm8: i32) -> f64x4 { #[inline(always)] #[target_feature = "+avx,+sse2"] #[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))] -pub unsafe fn _mm_permute_pd(a: f64x2, imm8: i32) -> f64x2 { - use x86::i586::sse2::_mm_undefined_pd; - +pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle2 { ($a:expr, $b:expr) => { @@ -1194,7 +1192,7 @@ pub unsafe fn _mm256_insertf128_ps(a: f32x8, b: __m128, imm8: i32) -> f32x8 { #[inline(always)] #[target_feature = "+avx"] #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))] -pub unsafe fn _mm256_insertf128_pd(a: f64x4, b: f64x2, imm8: i32) -> f64x4 { +pub unsafe fn _mm256_insertf128_pd(a: f64x4, b: __m128d, imm8: i32) -> f64x4 { match imm8 & 1 { 0 => simd_shuffle4(a, _mm256_castpd128_pd256(b), [4, 5, 2, 3]), _ => simd_shuffle4(a, _mm256_castpd128_pd256(b), [0, 1, 4, 5]), @@ -2139,7 +2137,7 @@ pub unsafe fn _mm256_castps256_ps128(a: f32x8) -> __m128 { #[target_feature = "+avx"] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. -pub unsafe fn _mm256_castpd256_pd128(a: f64x4) -> f64x2 { +pub unsafe fn _mm256_castpd256_pd128(a: f64x4) -> __m128d { simd_shuffle2(a, a, [0, 1]) } @@ -2171,7 +2169,7 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> f32x8 { #[target_feature = "+avx"] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. -pub unsafe fn _mm256_castpd128_pd256(a: f64x2) -> f64x4 { +pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> f64x4 { // FIXME simd_shuffle4(a, a, [0, 1, -1, -1]) simd_shuffle4(a, a, [0, 1, 0, 0]) } @@ -2221,8 +2219,7 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i { #[target_feature = "+avx,+sse2"] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. -pub unsafe fn _mm256_zextpd128_pd256(a: f64x2) -> f64x4 { - use x86::i586::sse2::_mm_setzero_pd; +pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> f64x4 { simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3]) } @@ -2326,7 +2323,6 @@ pub unsafe fn _mm256_loadu2_m128( pub unsafe fn _mm256_loadu2_m128d( hiaddr: *const f64, loaddr: *const f64 ) -> f64x4 { - use x86::i586::sse2::_mm_loadu_pd; let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr)); _mm256_insertf128_pd(a, _mm_loadu_pd(hiaddr), 1) } @@ -2371,7 +2367,6 @@ pub unsafe fn _mm256_storeu2_m128( pub unsafe fn _mm256_storeu2_m128d( hiaddr: *mut f64, loaddr: *mut f64, a: f64x4 ) { - use x86::i586::sse2::_mm_storeu_pd; let lo = _mm256_castpd256_pd128(a); _mm_storeu_pd(loaddr, lo); let hi = _mm256_extractf128_pd(a, 1); @@ -3104,9 +3099,9 @@ mod tests { #[simd_test = "avx"] unsafe fn test_mm256_extractf128_pd() { let a = f64x4::new(4., 3., 2., 5.); - let r = avx::_mm256_extractf128_pd(a, 0); - let e = f64x2::new(4., 3.); - assert_eq!(r, e); + let r = _mm256_extractf128_pd(a, 0); + let e = _mm_setr_pd(4., 3.); + assert_eq_m128d(r, e); } #[simd_test = "avx"] @@ -3189,10 +3184,10 @@ mod tests { #[simd_test = "avx"] unsafe fn test_mm_permute_pd() { - let a = f64x2::new(4., 3.); - let r = avx::_mm_permute_pd(a, 1); - let e = f64x2::new(3., 4.); - assert_eq!(r, e); + let a = _mm_setr_pd(4., 3.); + let r = _mm_permute_pd(a, 1); + let e = _mm_setr_pd(3., 4.); + assert_eq_m128d(r, e); } #[simd_test = "avx"] @@ -3271,8 +3266,8 @@ mod tests { #[simd_test = "avx"] unsafe fn test_mm256_insertf128_pd() { let a = f64x4::new(1., 2., 3., 4.); - let b = f64x2::new(5., 6.); - let r = avx::_mm256_insertf128_pd(a, b, 0); + let b = _mm_setr_pd(5., 6.); + let r = _mm256_insertf128_pd(a, b, 0); let e = f64x4::new(5., 6., 3., 4.); assert_eq!(r, e); } @@ -4078,8 +4073,8 @@ mod tests { #[simd_test = "avx"] unsafe fn test_mm256_castpd256_pd128() { let a = f64x4::new(1., 2., 3., 4.); - let r = avx::_mm256_castpd256_pd128(a); - assert_eq!(r, f64x2::new(1., 2.)); + let r = _mm256_castpd256_pd128(a); + assert_eq_m128d(r, _mm_setr_pd(1., 2.)); } #[simd_test = "avx"] @@ -4107,8 +4102,8 @@ mod tests { #[simd_test = "avx"] unsafe fn test_mm256_zextpd128_pd256() { - let a = f64x2::new(1., 2.); - let r = avx::_mm256_zextpd128_pd256(a); + let a = _mm_setr_pd(1., 2.); + let r = _mm256_zextpd128_pd256(a); let e = f64x4::new(1., 2., 0., 0.); assert_eq!(r, e); } @@ -4271,8 +4266,8 @@ mod tests { &mut lo as *mut _ as *mut f64, a, ); - assert_eq!(hi, f64x2::new(3., 4.)); - assert_eq!(lo, f64x2::new(1., 2.)); + assert_eq_m128d(hi, _mm_setr_pd(3., 4.)); + assert_eq_m128d(lo, _mm_setr_pd(1., 2.)); } #[simd_test = "avx"] diff --git a/coresimd/src/x86/i586/sse.rs b/coresimd/src/x86/i586/sse.rs index df92f7ee1b6f6..db361bd3d8d3f 100644 --- a/coresimd/src/x86/i586/sse.rs +++ b/coresimd/src/x86/i586/sse.rs @@ -1711,20 +1711,6 @@ mod tests { use stdsimd_test::simd_test; use test::black_box; // Used to inhibit constant-folding. - #[target_feature = "+sse"] - unsafe fn assert_eq_m128(a: __m128, b: __m128) { - let r = _mm_cmpeq_ps(a, b); - if _mm_movemask_ps(r) != 0b1111 { - panic!("{:?} != {:?}", a, b); - } - } - - #[target_feature = "+sse"] - unsafe fn get_m128(a: __m128, idx: usize) -> f32 { - union A { a: __m128, b: [f32; 4] }; - transmute::<__m128, A>(a).b[idx] - } - #[simd_test = "sse"] unsafe fn test_mm_add_ps() { let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); diff --git a/coresimd/src/x86/i586/sse2.rs b/coresimd/src/x86/i586/sse2.rs index 44b76d6165e2d..22da044b32205 100644 --- a/coresimd/src/x86/i586/sse2.rs +++ b/coresimd/src/x86/i586/sse2.rs @@ -1,15 +1,16 @@ //! Streaming SIMD Extensions 2 (SSE2) + #[cfg(test)] use stdsimd_test::assert_instr; use core::mem; use core::ptr; -use simd_llvm::{simd_cast, simd_shuffle16, simd_shuffle2, simd_shuffle4, - simd_shuffle8}; +use simd_llvm::*; use v128::*; use v64::*; +use x86::*; /// Provide a hint to the processor that the code sequence is a spin-wait loop. /// @@ -61,80 +62,80 @@ pub unsafe fn _mm_mfence() { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(paddb))] -pub unsafe fn _mm_add_epi8(a: i8x16, b: i8x16) -> i8x16 { - a + b +pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(simd_add(a.as_i8x16(), b.as_i8x16())) } /// Add packed 16-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(paddw))] -pub unsafe fn _mm_add_epi16(a: i16x8, b: i16x8) -> i16x8 { - a + b +pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(simd_add(a.as_i16x8(), b.as_i16x8())) } /// Add packed 32-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(paddd))] -pub unsafe fn _mm_add_epi32(a: i32x4, b: i32x4) -> i32x4 { - a + b +pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(simd_add(a.as_i32x4(), b.as_i32x4())) } /// Add packed 64-bit integers in `a` and "b`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(paddq))] -pub unsafe fn _mm_add_epi64(a: i64x2, b: i64x2) -> i64x2 { - a + b +pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(simd_add(a.as_i64x2(), b.as_i64x2())) } /// Add packed 8-bit integers in `a` and `b` using saturation. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(paddsb))] -pub unsafe fn _mm_adds_epi8(a: i8x16, b: i8x16) -> i8x16 { - paddsb(a, b) +pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(paddsb(a.as_i8x16(), b.as_i8x16())) } /// Add packed 16-bit integers in `a` and `b` using saturation. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(paddsw))] -pub unsafe fn _mm_adds_epi16(a: i16x8, b: i16x8) -> i16x8 { - paddsw(a, b) +pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(paddsw(a.as_i16x8(), b.as_i16x8())) } /// Add packed unsigned 8-bit integers in `a` and `b` using saturation. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(paddusb))] -pub unsafe fn _mm_adds_epu8(a: u8x16, b: u8x16) -> u8x16 { - paddsub(a, b) +pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(paddsub(a.as_u8x16(), b.as_u8x16())) } /// Add packed unsigned 16-bit integers in `a` and `b` using saturation. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(paddusw))] -pub unsafe fn _mm_adds_epu16(a: u16x8, b: u16x8) -> u16x8 { - paddsuw(a, b) +pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(paddsuw(a.as_u16x8(), b.as_u16x8())) } /// Average packed unsigned 8-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pavgb))] -pub unsafe fn _mm_avg_epu8(a: u8x16, b: u8x16) -> u8x16 { - pavgb(a, b) +pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(pavgb(a.as_u8x16(), b.as_u8x16())) } /// Average packed unsigned 16-bit integers in `a` and `b`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pavgw))] -pub unsafe fn _mm_avg_epu16(a: u16x8, b: u16x8) -> u16x8 { - pavgw(a, b) +pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(pavgw(a.as_u16x8(), b.as_u16x8())) } /// Multiply and then horizontally add signed 16 bit integers in `a` and `b`. @@ -145,8 +146,8 @@ pub unsafe fn _mm_avg_epu16(a: u16x8, b: u16x8) -> u16x8 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pmaddwd))] -pub unsafe fn _mm_madd_epi16(a: i16x8, b: i16x8) -> i32x4 { - pmaddwd(a, b) +pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) } /// Compare packed 16-bit integers in `a` and `b`, and return the packed @@ -154,8 +155,8 @@ pub unsafe fn _mm_madd_epi16(a: i16x8, b: i16x8) -> i32x4 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pmaxsw))] -pub unsafe fn _mm_max_epi16(a: i16x8, b: i16x8) -> i16x8 { - pmaxsw(a, b) +pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(pmaxsw(a.as_i16x8(), b.as_i16x8())) } /// Compare packed unsigned 8-bit integers in `a` and `b`, and return the @@ -163,8 +164,8 @@ pub unsafe fn _mm_max_epi16(a: i16x8, b: i16x8) -> i16x8 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pmaxub))] -pub unsafe fn _mm_max_epu8(a: u8x16, b: u8x16) -> u8x16 { - pmaxub(a, b) +pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(pmaxub(a.as_u8x16(), b.as_u8x16())) } /// Compare packed 16-bit integers in `a` and `b`, and return the packed @@ -172,8 +173,8 @@ pub unsafe fn _mm_max_epu8(a: u8x16, b: u8x16) -> u8x16 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pminsw))] -pub unsafe fn _mm_min_epi16(a: i16x8, b: i16x8) -> i16x8 { - pminsw(a, b) +pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(pminsw(a.as_i16x8(), b.as_i16x8())) } /// Compare packed unsigned 8-bit integers in `a` and `b`, and return the @@ -181,8 +182,8 @@ pub unsafe fn _mm_min_epi16(a: i16x8, b: i16x8) -> i16x8 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pminub))] -pub unsafe fn _mm_min_epu8(a: u8x16, b: u8x16) -> u8x16 { - pminub(a, b) +pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(pminub(a.as_u8x16(), b.as_u8x16())) } /// Multiply the packed 16-bit integers in `a` and `b`. @@ -192,8 +193,8 @@ pub unsafe fn _mm_min_epu8(a: u8x16, b: u8x16) -> u8x16 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pmulhw))] -pub unsafe fn _mm_mulhi_epi16(a: i16x8, b: i16x8) -> i16x8 { - pmulhw(a, b) +pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(pmulhw(a.as_i16x8(), b.as_i16x8())) } /// Multiply the packed unsigned 16-bit integers in `a` and `b`. @@ -203,8 +204,8 @@ pub unsafe fn _mm_mulhi_epi16(a: i16x8, b: i16x8) -> i16x8 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pmulhuw))] -pub unsafe fn _mm_mulhi_epu16(a: u16x8, b: u16x8) -> u16x8 { - pmulhuw(a, b) +pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(pmulhuw(a.as_u16x8(), b.as_u16x8())) } /// Multiply the packed 16-bit integers in `a` and `b`. @@ -214,8 +215,8 @@ pub unsafe fn _mm_mulhi_epu16(a: u16x8, b: u16x8) -> u16x8 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pmullw))] -pub unsafe fn _mm_mullo_epi16(a: i16x8, b: i16x8) -> i16x8 { - a * b +pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) } /// Multiply the low unsigned 32-bit integers from each packed 64-bit element @@ -225,8 +226,8 @@ pub unsafe fn _mm_mullo_epi16(a: i16x8, b: i16x8) -> i16x8 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pmuludq))] -pub unsafe fn _mm_mul_epu32(a: u32x4, b: u32x4) -> u64x2 { - pmuludq(a, b) +pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(pmuludq(a.as_u32x4(), b.as_u32x4())) } /// Sum the absolute differences of packed unsigned 8-bit integers. @@ -238,40 +239,40 @@ pub unsafe fn _mm_mul_epu32(a: u32x4, b: u32x4) -> u64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psadbw))] -pub unsafe fn _mm_sad_epu8(a: u8x16, b: u8x16) -> u64x2 { - psadbw(a, b) +pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(psadbw(a.as_u8x16(), b.as_u8x16())) } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psubb))] -pub unsafe fn _mm_sub_epi8(a: i8x16, b: i8x16) -> i8x16 { - a - b +pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psubw))] -pub unsafe fn _mm_sub_epi16(a: i16x8, b: i16x8) -> i16x8 { - a - b +pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) } /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psubd))] -pub unsafe fn _mm_sub_epi32(a: i32x4, b: i32x4) -> i32x4 { - a - b +pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) } /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psubq))] -pub unsafe fn _mm_sub_epi64(a: i64x2, b: i64x2) -> i64x2 { - a - b +pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` @@ -279,8 +280,8 @@ pub unsafe fn _mm_sub_epi64(a: i64x2, b: i64x2) -> i64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psubsb))] -pub unsafe fn _mm_subs_epi8(a: i8x16, b: i8x16) -> i8x16 { - psubsb(a, b) +pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(psubsb(a.as_i8x16(), b.as_i8x16())) } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` @@ -288,8 +289,8 @@ pub unsafe fn _mm_subs_epi8(a: i8x16, b: i8x16) -> i8x16 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psubsw))] -pub unsafe fn _mm_subs_epi16(a: i16x8, b: i16x8) -> i16x8 { - psubsw(a, b) +pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(psubsw(a.as_i16x8(), b.as_i16x8())) } /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit @@ -297,8 +298,8 @@ pub unsafe fn _mm_subs_epi16(a: i16x8, b: i16x8) -> i16x8 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psubusb))] -pub unsafe fn _mm_subs_epu8(a: u8x16, b: u8x16) -> u8x16 { - psubusb(a, b) +pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(psubusb(a.as_u8x16(), b.as_u8x16())) } /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit @@ -306,16 +307,17 @@ pub unsafe fn _mm_subs_epu8(a: u8x16, b: u8x16) -> u8x16 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psubusw))] -pub unsafe fn _mm_subs_epu16(a: u16x8, b: u16x8) -> u16x8 { - psubusw(a, b) +pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(psubusw(a.as_u16x8(), b.as_u16x8())) } /// Shift `a` left by `imm8` bytes while shifting in zeros. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))] -pub unsafe fn _mm_slli_si128(a: i8x16, imm8: i32) -> i8x16 { - let (zero, imm8) = (i8x16::splat(0), imm8 as u32); +pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i { + let (zero, imm8) = (_mm_set1_epi8(0).as_i8x16(), imm8 as u32); + let a = a.as_i8x16(); macro_rules! shuffle { ($shift:expr) => { simd_shuffle16::(zero, a, [ @@ -330,7 +332,7 @@ pub unsafe fn _mm_slli_si128(a: i8x16, imm8: i32) -> i8x16 { ]) } } - match imm8 { + let x = match imm8 { 0 => shuffle!(0), 1 => shuffle!(1), 2 => shuffle!(2), @@ -348,14 +350,15 @@ pub unsafe fn _mm_slli_si128(a: i8x16, imm8: i32) -> i8x16 { 14 => shuffle!(14), 15 => shuffle!(15), _ => shuffle!(16), - } + }; + mem::transmute(x) } /// Shift `a` left by `imm8` bytes while shifting in zeros. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))] -pub unsafe fn _mm_bslli_si128(a: i8x16, imm8: i32) -> i8x16 { +pub unsafe fn _mm_bslli_si128(a: __m128i, imm8: i32) -> __m128i { _mm_slli_si128(a, imm8) } @@ -363,7 +366,7 @@ pub unsafe fn _mm_bslli_si128(a: i8x16, imm8: i32) -> i8x16 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))] -pub unsafe fn _mm_bsrli_si128(a: i8x16, imm8: i32) -> i8x16 { +pub unsafe fn _mm_bsrli_si128(a: __m128i, imm8: i32) -> __m128i { _mm_srli_si128(a, imm8) } @@ -371,8 +374,8 @@ pub unsafe fn _mm_bsrli_si128(a: i8x16, imm8: i32) -> i8x16 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psllw))] -pub unsafe fn _mm_slli_epi16(a: i16x8, imm8: i32) -> i16x8 { - pslliw(a, imm8) +pub unsafe fn _mm_slli_epi16(a: __m128i, imm8: i32) -> __m128i { + mem::transmute(pslliw(a.as_i16x8(), imm8)) } /// Shift packed 16-bit integers in `a` left by `count` while shifting in @@ -380,16 +383,16 @@ pub unsafe fn _mm_slli_epi16(a: i16x8, imm8: i32) -> i16x8 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psllw))] -pub unsafe fn _mm_sll_epi16(a: i16x8, count: i16x8) -> i16x8 { - psllw(a, count) +pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { + mem::transmute(psllw(a.as_i16x8(), count.as_i16x8())) } /// Shift packed 32-bit integers in `a` left by `imm8` while shifting in zeros. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pslld))] -pub unsafe fn _mm_slli_epi32(a: i32x4, imm8: i32) -> i32x4 { - psllid(a, imm8) +pub unsafe fn _mm_slli_epi32(a: __m128i, imm8: i32) -> __m128i { + mem::transmute(psllid(a.as_i32x4(), imm8)) } /// Shift packed 32-bit integers in `a` left by `count` while shifting in @@ -397,16 +400,16 @@ pub unsafe fn _mm_slli_epi32(a: i32x4, imm8: i32) -> i32x4 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pslld))] -pub unsafe fn _mm_sll_epi32(a: i32x4, count: i32x4) -> i32x4 { - pslld(a, count) +pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { + mem::transmute(pslld(a.as_i32x4(), count.as_i32x4())) } /// Shift packed 64-bit integers in `a` left by `imm8` while shifting in zeros. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psllq))] -pub unsafe fn _mm_slli_epi64(a: i64x2, imm8: i32) -> i64x2 { - pslliq(a, imm8) +pub unsafe fn _mm_slli_epi64(a: __m128i, imm8: i32) -> __m128i { + mem::transmute(pslliq(a.as_i64x2(), imm8)) } /// Shift packed 64-bit integers in `a` left by `count` while shifting in @@ -414,8 +417,8 @@ pub unsafe fn _mm_slli_epi64(a: i64x2, imm8: i32) -> i64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psllq))] -pub unsafe fn _mm_sll_epi64(a: i64x2, count: i64x2) -> i64x2 { - psllq(a, count) +pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { + mem::transmute(psllq(a.as_i64x2(), count.as_i64x2())) } /// Shift packed 16-bit integers in `a` right by `imm8` while shifting in sign @@ -423,8 +426,8 @@ pub unsafe fn _mm_sll_epi64(a: i64x2, count: i64x2) -> i64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psraw))] -pub unsafe fn _mm_srai_epi16(a: i16x8, imm8: i32) -> i16x8 { - psraiw(a, imm8) +pub unsafe fn _mm_srai_epi16(a: __m128i, imm8: i32) -> __m128i { + mem::transmute(psraiw(a.as_i16x8(), imm8)) } /// Shift packed 16-bit integers in `a` right by `count` while shifting in sign @@ -432,8 +435,8 @@ pub unsafe fn _mm_srai_epi16(a: i16x8, imm8: i32) -> i16x8 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psraw))] -pub unsafe fn _mm_sra_epi16(a: i16x8, count: i16x8) -> i16x8 { - psraw(a, count) +pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { + mem::transmute(psraw(a.as_i16x8(), count.as_i16x8())) } /// Shift packed 32-bit integers in `a` right by `imm8` while shifting in sign @@ -441,8 +444,8 @@ pub unsafe fn _mm_sra_epi16(a: i16x8, count: i16x8) -> i16x8 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psrad))] -pub unsafe fn _mm_srai_epi32(a: i32x4, imm8: i32) -> i32x4 { - psraid(a, imm8) +pub unsafe fn _mm_srai_epi32(a: __m128i, imm8: i32) -> __m128i { + mem::transmute(psraid(a.as_i32x4(), imm8)) } /// Shift packed 32-bit integers in `a` right by `count` while shifting in sign @@ -450,19 +453,20 @@ pub unsafe fn _mm_srai_epi32(a: i32x4, imm8: i32) -> i32x4 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psrad))] -pub unsafe fn _mm_sra_epi32(a: i32x4, count: i32x4) -> i32x4 { - psrad(a, count) +pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { + mem::transmute(psrad(a.as_i32x4(), count.as_i32x4())) } /// Shift `a` right by `imm8` bytes while shifting in zeros. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))] -pub unsafe fn _mm_srli_si128(a: i8x16, imm8: i32) -> i8x16 { - let (zero, imm8) = (i8x16::splat(0), imm8 as u32); +pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i { + let (zero, imm8) = (_mm_set1_epi8(0).as_i8x16(), imm8 as u32); + let a = a.as_i8x16(); macro_rules! shuffle { ($shift:expr) => { - simd_shuffle16::(a, zero, [ + simd_shuffle16(a, zero, [ 0 + $shift, 1 + $shift, 2 + $shift, 3 + $shift, 4 + $shift, 5 + $shift, @@ -474,7 +478,7 @@ pub unsafe fn _mm_srli_si128(a: i8x16, imm8: i32) -> i8x16 { ]) } } - match imm8 { + let x: i8x16 = match imm8 { 0 => shuffle!(0), 1 => shuffle!(1), 2 => shuffle!(2), @@ -492,7 +496,8 @@ pub unsafe fn _mm_srli_si128(a: i8x16, imm8: i32) -> i8x16 { 14 => shuffle!(14), 15 => shuffle!(15), _ => shuffle!(16), - } + }; + mem::transmute(x) } /// Shift packed 16-bit integers in `a` right by `imm8` while shifting in @@ -500,8 +505,8 @@ pub unsafe fn _mm_srli_si128(a: i8x16, imm8: i32) -> i8x16 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psrlw))] -pub unsafe fn _mm_srli_epi16(a: i16x8, imm8: i32) -> i16x8 { - psrliw(a, imm8) +pub unsafe fn _mm_srli_epi16(a: __m128i, imm8: i32) -> __m128i { + mem::transmute(psrliw(a.as_i16x8(), imm8)) } /// Shift packed 16-bit integers in `a` right by `count` while shifting in @@ -509,8 +514,8 @@ pub unsafe fn _mm_srli_epi16(a: i16x8, imm8: i32) -> i16x8 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psrlw))] -pub unsafe fn _mm_srl_epi16(a: i16x8, count: i16x8) -> i16x8 { - psrlw(a, count) +pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { + mem::transmute(psrlw(a.as_i16x8(), count.as_i16x8())) } /// Shift packed 32-bit integers in `a` right by `imm8` while shifting in @@ -518,8 +523,8 @@ pub unsafe fn _mm_srl_epi16(a: i16x8, count: i16x8) -> i16x8 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psrld))] -pub unsafe fn _mm_srli_epi32(a: i32x4, imm8: i32) -> i32x4 { - psrlid(a, imm8) +pub unsafe fn _mm_srli_epi32(a: __m128i, imm8: i32) -> __m128i { + mem::transmute(psrlid(a.as_i32x4(), imm8)) } /// Shift packed 32-bit integers in `a` right by `count` while shifting in @@ -527,8 +532,8 @@ pub unsafe fn _mm_srli_epi32(a: i32x4, imm8: i32) -> i32x4 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psrld))] -pub unsafe fn _mm_srl_epi32(a: i32x4, count: i32x4) -> i32x4 { - psrld(a, count) +pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { + mem::transmute(psrld(a.as_i32x4(), count.as_i32x4())) } /// Shift packed 64-bit integers in `a` right by `imm8` while shifting in @@ -536,8 +541,8 @@ pub unsafe fn _mm_srl_epi32(a: i32x4, count: i32x4) -> i32x4 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psrlq))] -pub unsafe fn _mm_srli_epi64(a: i64x2, imm8: i32) -> i64x2 { - psrliq(a, imm8) +pub unsafe fn _mm_srli_epi64(a: __m128i, imm8: i32) -> __m128i { + mem::transmute(psrliq(a.as_i64x2(), imm8)) } /// Shift packed 64-bit integers in `a` right by `count` while shifting in @@ -545,8 +550,8 @@ pub unsafe fn _mm_srli_epi64(a: i64x2, imm8: i32) -> i64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(psrlq))] -pub unsafe fn _mm_srl_epi64(a: i64x2, count: i64x2) -> i64x2 { - psrlq(a, count) +pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { + mem::transmute(psrlq(a.as_i64x2(), count.as_i64x2())) } /// Compute the bitwise AND of 128 bits (representing integer data) in `a` and @@ -555,7 +560,7 @@ pub unsafe fn _mm_srl_epi64(a: i64x2, count: i64x2) -> i64x2 { #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(andps))] pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i { - __m128i::from(i8x16::from(a) & i8x16::from(b)) + simd_and(a, b) } /// Compute the bitwise NOT of 128 bits (representing integer data) in `a` and @@ -564,7 +569,7 @@ pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i { #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(andnps))] pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i { - __m128i::from((!i8x16::from(a)) & i8x16::from(b)) + simd_and(simd_xor(_mm_set1_epi8(-1), a), b) } /// Compute the bitwise OR of 128 bits (representing integer data) in `a` and @@ -573,7 +578,7 @@ pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i { #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(orps))] pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i { - __m128i::from(i8x16::from(a) | i8x16::from(b)) + simd_or(a, b) } /// Compute the bitwise XOR of 128 bits (representing integer data) in `a` and @@ -582,79 +587,79 @@ pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i { #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(xorps))] pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i { - __m128i::from(i8x16::from(a) ^ i8x16::from(b)) + simd_xor(a, b) } /// Compare packed 8-bit integers in `a` and `b` for equality. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pcmpeqb))] -pub unsafe fn _mm_cmpeq_epi8(a: i8x16, b: i8x16) -> i8x16 { - a.eq(b) +pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i { + mem::transmute::(simd_eq(a.as_i8x16(), b.as_i8x16())) } /// Compare packed 16-bit integers in `a` and `b` for equality. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pcmpeqw))] -pub unsafe fn _mm_cmpeq_epi16(a: i16x8, b: i16x8) -> i16x8 { - a.eq(b) +pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute::(simd_eq(a.as_i16x8(), b.as_i16x8())) } /// Compare packed 32-bit integers in `a` and `b` for equality. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pcmpeqd))] -pub unsafe fn _mm_cmpeq_epi32(a: i32x4, b: i32x4) -> i32x4 { - a.eq(b) +pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i { + mem::transmute::(simd_eq(a.as_i32x4(), b.as_i32x4())) } /// Compare packed 8-bit integers in `a` and `b` for greater-than. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pcmpgtb))] -pub unsafe fn _mm_cmpgt_epi8(a: i8x16, b: i8x16) -> i8x16 { - a.gt(b) +pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i { + mem::transmute::(simd_gt(a.as_i8x16(), b.as_i8x16())) } /// Compare packed 16-bit integers in `a` and `b` for greater-than. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pcmpgtw))] -pub unsafe fn _mm_cmpgt_epi16(a: i16x8, b: i16x8) -> i16x8 { - a.gt(b) +pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute::(simd_gt(a.as_i16x8(), b.as_i16x8())) } /// Compare packed 32-bit integers in `a` and `b` for greater-than. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pcmpgtd))] -pub unsafe fn _mm_cmpgt_epi32(a: i32x4, b: i32x4) -> i32x4 { - a.gt(b) +pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i { + mem::transmute::(simd_gt(a.as_i32x4(), b.as_i32x4())) } /// Compare packed 8-bit integers in `a` and `b` for less-than. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pcmpgtb))] -pub unsafe fn _mm_cmplt_epi8(a: i8x16, b: i8x16) -> i8x16 { - a.lt(b) +pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i { + mem::transmute::(simd_lt(a.as_i8x16(), b.as_i8x16())) } /// Compare packed 16-bit integers in `a` and `b` for less-than. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pcmpgtw))] -pub unsafe fn _mm_cmplt_epi16(a: i16x8, b: i16x8) -> i16x8 { - a.lt(b) +pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute::(simd_lt(a.as_i16x8(), b.as_i16x8())) } /// Compare packed 32-bit integers in `a` and `b` for less-than. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pcmpgtd))] -pub unsafe fn _mm_cmplt_epi32(a: i32x4, b: i32x4) -> i32x4 { - a.lt(b) +pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i { + mem::transmute::(simd_lt(a.as_i32x4(), b.as_i32x4())) } /// Convert the lower two packed 32-bit integers in `a` to packed @@ -662,8 +667,9 @@ pub unsafe fn _mm_cmplt_epi32(a: i32x4, b: i32x4) -> i32x4 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvtdq2pd))] -pub unsafe fn _mm_cvtepi32_pd(a: i32x4) -> f64x2 { - simd_cast::(simd_shuffle2(a, a, [0, 1])) +pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { + let a = a.as_i32x4(); + simd_cast::(simd_shuffle2(a, a, [0, 1])) } /// Return `a` with its lower element replaced by `b` after converting it to @@ -671,8 +677,8 @@ pub unsafe fn _mm_cvtepi32_pd(a: i32x4) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvtsi2sd))] -pub unsafe fn _mm_cvtsi32_sd(a: f64x2, b: i32) -> f64x2 { - a.replace(0, b as f64) +pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d { + simd_insert(a, 0, b as f64) } /// Convert packed 32-bit integers in `a` to packed single-precision (32-bit) @@ -680,8 +686,8 @@ pub unsafe fn _mm_cvtsi32_sd(a: f64x2, b: i32) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvtdq2ps))] -pub unsafe fn _mm_cvtepi32_ps(a: i32x4) -> f32x4 { - cvtdq2ps(a) +pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 { + cvtdq2ps(a.as_i32x4()) } /// Convert packed single-precision (32-bit) floating-point elements in `a` @@ -689,8 +695,8 @@ pub unsafe fn _mm_cvtepi32_ps(a: i32x4) -> f32x4 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvtps2dq))] -pub unsafe fn _mm_cvtps_epi32(a: f32x4) -> i32x4 { - cvtps2dq(a) +pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i { + mem::transmute(cvtps2dq(a)) } /// Return a vector whose lowest element is `a` and all higher elements are @@ -698,16 +704,16 @@ pub unsafe fn _mm_cvtps_epi32(a: f32x4) -> i32x4 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movd))] -pub unsafe fn _mm_cvtsi32_si128(a: i32) -> i32x4 { - i32x4::new(a, 0, 0, 0) +pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i { + mem::transmute(i32x4::new(a, 0, 0, 0)) } /// Return the lowest element of `a`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(all(test, not(windows)), assert_instr(movd))] // FIXME mov on windows -pub unsafe fn _mm_cvtsi128_si32(a: i32x4) -> i32 { - a.extract(0) +pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 { + simd_extract(a.as_i32x4(), 0) } /// Set packed 64-bit integers with the supplied values, from highest to @@ -715,16 +721,16 @@ pub unsafe fn _mm_cvtsi128_si32(a: i32x4) -> i32 { #[inline(always)] #[target_feature = "+sse2"] // no particular instruction to test -pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> i64x2 { - i64x2::new(e0, e1) +pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { + mem::transmute(i64x2::new(e0, e1)) } /// Set packed 32-bit integers with the supplied values. #[inline(always)] #[target_feature = "+sse2"] // no particular instruction to test -pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> i32x4 { - i32x4::new(e0, e1, e2, e3) +pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { + mem::transmute(i32x4::new(e0, e1, e2, e3)) } /// Set packed 16-bit integers with the supplied values. @@ -733,8 +739,8 @@ pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> i32x4 { // no particular instruction to test pub unsafe fn _mm_set_epi16( e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16 -) -> i16x8 { - i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7) +) -> __m128i { + mem::transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) } /// Set packed 8-bit integers with the supplied values. @@ -744,51 +750,51 @@ pub unsafe fn _mm_set_epi16( pub unsafe fn _mm_set_epi8( e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8, e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8, -) -> i8x16 { +) -> __m128i { #[cfg_attr(rustfmt, rustfmt_skip)] - i8x16::new( + mem::transmute(i8x16::new( e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, - ) + )) } /// Broadcast 64-bit integer `a` to all elements. #[inline(always)] #[target_feature = "+sse2"] // no particular instruction to test -pub unsafe fn _mm_set1_epi64x(a: i64) -> i64x2 { - i64x2::splat(a) +pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i { + _mm_set_epi64x(a, a) } /// Broadcast 32-bit integer `a` to all elements. #[inline(always)] #[target_feature = "+sse2"] // no particular instruction to test -pub unsafe fn _mm_set1_epi32(a: i32) -> i32x4 { - i32x4::splat(a) +pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i { + _mm_set_epi32(a, a, a, a) } /// Broadcast 16-bit integer `a` to all elements. #[inline(always)] #[target_feature = "+sse2"] // no particular instruction to test -pub unsafe fn _mm_set1_epi16(a: i16) -> i16x8 { - i16x8::splat(a) +pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i { + _mm_set_epi16(a, a, a, a, a, a, a, a) } /// Broadcast 8-bit integer `a` to all elements. #[inline(always)] #[target_feature = "+sse2"] // no particular instruction to test -pub unsafe fn _mm_set1_epi8(a: i8) -> i8x16 { - i8x16::splat(a) +pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i { + _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) } /// Set packed 32-bit integers with the supplied values in reverse order. #[inline(always)] #[target_feature = "+sse2"] // no particular instruction to test -pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> i32x4 { - i32x4::new(e3, e2, e1, e0) +pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { + _mm_set_epi32(e0, e1, e2, e3) } /// Set packed 16-bit integers with the supplied values in reverse order. @@ -797,8 +803,8 @@ pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> i32x4 { // no particular instruction to test pub unsafe fn _mm_setr_epi16( e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16 -) -> i16x8 { - i16x8::new(e7, e6, e5, e4, e3, e2, e1, e0) +) -> __m128i { + _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7) } /// Set packed 8-bit integers with the supplied values in reverse order. @@ -808,10 +814,10 @@ pub unsafe fn _mm_setr_epi16( pub unsafe fn _mm_setr_epi8( e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8, e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8, -) -> i8x16 { +) -> __m128i { #[cfg_attr(rustfmt, rustfmt_skip)] - i8x16::new( - e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0, + _mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, ) } @@ -820,7 +826,7 @@ pub unsafe fn _mm_setr_epi8( #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(xorps))] pub unsafe fn _mm_setzero_si128() -> __m128i { - mem::transmute((0_i64, 0_i64)) + _mm_set1_epi64x(0) } /// Load 64-bit integer from memory into first element of returned vector. @@ -831,8 +837,8 @@ pub unsafe fn _mm_setzero_si128() -> __m128i { not(all(target_os = "linux", target_arch = "x86_64")), target_arch = "x86_64"), assert_instr(movq))] -pub unsafe fn _mm_loadl_epi64(mem_addr: *const i64x2) -> i64x2 { - i64x2::new((*mem_addr).extract(0), 0) +pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i { + _mm_set_epi64x(0, simd_extract((*mem_addr).as_i64x2(), 0)) } /// Load 128-bits of integer data from memory into a new vector. @@ -872,8 +878,8 @@ pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(maskmovdqu))] -pub unsafe fn _mm_maskmoveu_si128(a: i8x16, mask: i8x16, mem_addr: *mut i8) { - maskmovdqu(a, mask, mem_addr) +pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) { + maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr) } /// Store 128-bits of integer data from `a` into memory. @@ -941,8 +947,8 @@ pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { // FIXME movd on windows, movd on i686 #[cfg_attr(all(test, not(windows), target_arch = "x86_64"), assert_instr(movq))] -pub unsafe fn _mm_move_epi64(a: i64x2) -> i64x2 { - simd_shuffle2(a, i64x2::splat(0), [0, 2]) +pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i { + simd_shuffle2(a, _mm_setzero_si128(), [0, 2]) } /// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers @@ -950,8 +956,8 @@ pub unsafe fn _mm_move_epi64(a: i64x2) -> i64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(packsswb))] -pub unsafe fn _mm_packs_epi16(a: i16x8, b: i16x8) -> i8x16 { - packsswb(a, b) +pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(packsswb(a.as_i16x8(), b.as_i16x8())) } /// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers @@ -959,8 +965,8 @@ pub unsafe fn _mm_packs_epi16(a: i16x8, b: i16x8) -> i8x16 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(packssdw))] -pub unsafe fn _mm_packs_epi32(a: i32x4, b: i32x4) -> i16x8 { - packssdw(a, b) +pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(packssdw(a.as_i32x4(), b.as_i32x4())) } /// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers @@ -968,40 +974,39 @@ pub unsafe fn _mm_packs_epi32(a: i32x4, b: i32x4) -> i16x8 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(packuswb))] -pub unsafe fn _mm_packus_epi16(a: i16x8, b: i16x8) -> u8x16 { - packuswb(a, b) +pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(packuswb(a.as_i16x8(), b.as_i16x8())) } /// Return the `imm8` element of `a`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pextrw, imm8 = 9))] -pub unsafe fn _mm_extract_epi16(a: i16x8, imm8: i32) -> i32 { - let imm8 = (imm8 & 7) as u32; - (a.extract_unchecked(imm8) as i32) & 0xFFFF +pub unsafe fn _mm_extract_epi16(a: __m128i, imm8: i32) -> i32 { + simd_extract::<_, i16>(a.as_i16x8(), (imm8 & 7) as u32) as i32 } /// Return a new vector where the `imm8` element of `a` is replaced with `i`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pinsrw, imm8 = 9))] -pub unsafe fn _mm_insert_epi16(a: i16x8, i: i32, imm8: i32) -> i16x8 { - a.replace(imm8 as u32 & 0b111, i as i16) +pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32, imm8: i32) -> __m128i { + mem::transmute(simd_insert(a.as_i16x8(), (imm8 & 7) as u32, i as i16)) } /// Return a mask of the most significant bit of each element in `a`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pmovmskb))] -pub unsafe fn _mm_movemask_epi8(a: i8x16) -> i32 { - pmovmskb(a) +pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { + pmovmskb(a.as_i8x16()) } /// Shuffle 32-bit integers in `a` using the control in `imm8`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pshufd, imm8 = 9))] -pub unsafe fn _mm_shuffle_epi32(a: i32x4, imm8: i32) -> i32x4 { +pub unsafe fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i { // simd_shuffleX requires that its selector parameter be made up of // constant values, but we can't enforce that here. In spirit, we need // to write a `match` on all possible values of a byte, and for each value, @@ -1010,6 +1015,7 @@ pub unsafe fn _mm_shuffle_epi32(a: i32x4, imm8: i32) -> i32x4 { // // Of course, that's... awful. So we try to use macros to do it for us. let imm8 = (imm8 & 0xFF) as u8; + let a = a.as_i32x4(); macro_rules! shuffle_done { ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { @@ -1046,12 +1052,13 @@ pub unsafe fn _mm_shuffle_epi32(a: i32x4, imm8: i32) -> i32x4 { } } } - match imm8 & 0b11 { + let x: i32x4 = match imm8 & 0b11 { 0b00 => shuffle_x23!(0), 0b01 => shuffle_x23!(1), 0b10 => shuffle_x23!(2), _ => shuffle_x23!(3), - } + }; + mem::transmute(x) } /// Shuffle 16-bit integers in the high 64 bits of `a` using the control in @@ -1062,9 +1069,10 @@ pub unsafe fn _mm_shuffle_epi32(a: i32x4, imm8: i32) -> i32x4 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pshufhw, imm8 = 9))] -pub unsafe fn _mm_shufflehi_epi16(a: i16x8, imm8: i32) -> i16x8 { +pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i { // See _mm_shuffle_epi32. let imm8 = (imm8 & 0xFF) as u8; + let a = a.as_i16x8(); macro_rules! shuffle_done { ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { simd_shuffle8(a, a, [ @@ -1102,12 +1110,13 @@ pub unsafe fn _mm_shufflehi_epi16(a: i16x8, imm8: i32) -> i16x8 { } } } - match imm8 & 0b11 { + let x: i16x8 = match imm8 & 0b11 { 0b00 => shuffle_x23!(0), 0b01 => shuffle_x23!(1), 0b10 => shuffle_x23!(2), _ => shuffle_x23!(3), - } + }; + mem::transmute(x) } /// Shuffle 16-bit integers in the low 64 bits of `a` using the control in @@ -1118,9 +1127,10 @@ pub unsafe fn _mm_shufflehi_epi16(a: i16x8, imm8: i32) -> i16x8 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pshuflw, imm8 = 9))] -pub unsafe fn _mm_shufflelo_epi16(a: i16x8, imm8: i32) -> i16x8 { +pub unsafe fn _mm_shufflelo_epi16(a: __m128i, imm8: i32) -> __m128i { // See _mm_shuffle_epi32. let imm8 = (imm8 & 0xFF) as u8; + let a = a.as_i16x8(); macro_rules! shuffle_done { ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { @@ -1157,84 +1167,87 @@ pub unsafe fn _mm_shufflelo_epi16(a: i16x8, imm8: i32) -> i16x8 { } } } - match imm8 & 0b11 { + let x: i16x8 = match imm8 & 0b11 { 0b00 => shuffle_x23!(0), 0b01 => shuffle_x23!(1), 0b10 => shuffle_x23!(2), _ => shuffle_x23!(3), - } + }; + mem::transmute(x) } /// Unpack and interleave 8-bit integers from the high half of `a` and `b`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(punpckhbw))] -pub unsafe fn _mm_unpackhi_epi8(a: i8x16, b: i8x16) -> i8x16 { - simd_shuffle16( - a, - b, +pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { + mem::transmute::(simd_shuffle16( + a.as_i8x16(), + b.as_i8x16(), [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], - ) + )) } /// Unpack and interleave 16-bit integers from the high half of `a` and `b`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(punpckhwd))] -pub unsafe fn _mm_unpackhi_epi16(a: i16x8, b: i16x8) -> i16x8 { - simd_shuffle8(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) +pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { + let x = simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]); + mem::transmute::(x) } /// Unpack and interleave 32-bit integers from the high half of `a` and `b`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(punpckhdq))] -pub unsafe fn _mm_unpackhi_epi32(a: i32x4, b: i32x4) -> i32x4 { - simd_shuffle4(a, b, [2, 6, 3, 7]) +pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { + mem::transmute::(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) } /// Unpack and interleave 64-bit integers from the high half of `a` and `b`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(punpckhqdq))] -pub unsafe fn _mm_unpackhi_epi64(a: i64x2, b: i64x2) -> i64x2 { - simd_shuffle2(a, b, [1, 3]) +pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { + mem::transmute::(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [1, 3])) } /// Unpack and interleave 8-bit integers from the low half of `a` and `b`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(punpcklbw))] -pub unsafe fn _mm_unpacklo_epi8(a: i8x16, b: i8x16) -> i8x16 { - simd_shuffle16( - a, - b, +pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { + mem::transmute::(simd_shuffle16( + a.as_i8x16(), + b.as_i8x16(), [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], - ) + )) } /// Unpack and interleave 16-bit integers from the low half of `a` and `b`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(punpcklwd))] -pub unsafe fn _mm_unpacklo_epi16(a: i16x8, b: i16x8) -> i16x8 { - simd_shuffle8(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) +pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { + let x = simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]); + mem::transmute::(x) } /// Unpack and interleave 32-bit integers from the low half of `a` and `b`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(punpckldq))] -pub unsafe fn _mm_unpacklo_epi32(a: i32x4, b: i32x4) -> i32x4 { - simd_shuffle4(a, b, [0, 4, 1, 5]) +pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { + mem::transmute::(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) } /// Unpack and interleave 64-bit integers from the low half of `a` and `b`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(punpcklqdq))] -pub unsafe fn _mm_unpacklo_epi64(a: i64x2, b: i64x2) -> i64x2 { - simd_shuffle2(a, b, [0, 2]) +pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { + mem::transmute::(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [0, 2])) } /// Return a new vector with the low element of `a` replaced by the sum of the @@ -1242,8 +1255,8 @@ pub unsafe fn _mm_unpacklo_epi64(a: i64x2, b: i64x2) -> i64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(addsd))] -pub unsafe fn _mm_add_sd(a: f64x2, b: f64x2) -> f64x2 { - a.replace(0, a.extract(0) + b.extract(0)) +pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d { + simd_insert(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) } /// Add packed double-precision (64-bit) floating-point elements in `a` and @@ -1251,8 +1264,8 @@ pub unsafe fn _mm_add_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(addpd))] -pub unsafe fn _mm_add_pd(a: f64x2, b: f64x2) -> f64x2 { - a + b +pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d { + simd_add(a, b) } /// Return a new vector with the low element of `a` replaced by the result of @@ -1260,8 +1273,8 @@ pub unsafe fn _mm_add_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(divsd))] -pub unsafe fn _mm_div_sd(a: f64x2, b: f64x2) -> f64x2 { - a.replace(0, a.extract(0) / b.extract(0)) +pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d { + simd_insert(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) } /// Divide packed double-precision (64-bit) floating-point elements in `a` by @@ -1269,8 +1282,8 @@ pub unsafe fn _mm_div_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(divpd))] -pub unsafe fn _mm_div_pd(a: f64x2, b: f64x2) -> f64x2 { - a / b +pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d { + simd_div(a, b) } /// Return a new vector with the low element of `a` replaced by the maximum @@ -1278,7 +1291,7 @@ pub unsafe fn _mm_div_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(maxsd))] -pub unsafe fn _mm_max_sd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d { maxsd(a, b) } @@ -1287,7 +1300,7 @@ pub unsafe fn _mm_max_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(maxpd))] -pub unsafe fn _mm_max_pd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d { maxpd(a, b) } @@ -1296,7 +1309,7 @@ pub unsafe fn _mm_max_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(minsd))] -pub unsafe fn _mm_min_sd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d { minsd(a, b) } @@ -1305,7 +1318,7 @@ pub unsafe fn _mm_min_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(minpd))] -pub unsafe fn _mm_min_pd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d { minpd(a, b) } @@ -1314,8 +1327,8 @@ pub unsafe fn _mm_min_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(mulsd))] -pub unsafe fn _mm_mul_sd(a: f64x2, b: f64x2) -> f64x2 { - a.replace(0, a.extract(0) * b.extract(0)) +pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d { + simd_insert(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) } /// Multiply packed double-precision (64-bit) floating-point elements in `a` @@ -1323,8 +1336,8 @@ pub unsafe fn _mm_mul_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(mulpd))] -pub unsafe fn _mm_mul_pd(a: f64x2, b: f64x2) -> f64x2 { - a * b +pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d { + simd_mul(a, b) } /// Return a new vector with the low element of `a` replaced by the square @@ -1332,15 +1345,15 @@ pub unsafe fn _mm_mul_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(sqrtsd))] -pub unsafe fn _mm_sqrt_sd(a: f64x2, b: f64x2) -> f64x2 { - a.replace(0, sqrtsd(b).extract(0)) +pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d { + simd_insert(a, 0, _mm_cvtsd_f64(sqrtsd(b))) } /// Return a new vector with the square root of each of the values in `a`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(sqrtpd))] -pub unsafe fn _mm_sqrt_pd(a: f64x2) -> f64x2 { +pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d { sqrtpd(a) } @@ -1349,8 +1362,8 @@ pub unsafe fn _mm_sqrt_pd(a: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(subsd))] -pub unsafe fn _mm_sub_sd(a: f64x2, b: f64x2) -> f64x2 { - a.replace(0, a.extract(0) - b.extract(0)) +pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d { + simd_insert(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) } /// Subtract packed double-precision (64-bit) floating-point elements in `b` @@ -1358,8 +1371,8 @@ pub unsafe fn _mm_sub_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(subpd))] -pub unsafe fn _mm_sub_pd(a: f64x2, b: f64x2) -> f64x2 { - a - b +pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d { + simd_sub(a, b) } /// Compute the bitwise AND of packed double-precision (64-bit) floating-point @@ -1367,40 +1380,40 @@ pub unsafe fn _mm_sub_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(andps))] -pub unsafe fn _mm_and_pd(a: f64x2, b: f64x2) -> f64x2 { - let a: u64x2 = mem::transmute(a); - let b: u64x2 = mem::transmute(b); - mem::transmute(a & b) +pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d { + let a: __m128i = mem::transmute(a); + let b: __m128i = mem::transmute(b); + mem::transmute(_mm_and_si128(a, b)) } /// Compute the bitwise NOT of `a` and then AND with `b`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(andnps))] -pub unsafe fn _mm_andnot_pd(a: f64x2, b: f64x2) -> f64x2 { - let a: u64x2 = mem::transmute(a); - let b: u64x2 = mem::transmute(b); - mem::transmute((!a) & b) +pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d { + let a: __m128i = mem::transmute(a); + let b: __m128i = mem::transmute(b); + mem::transmute(_mm_andnot_si128(a, b)) } /// Compute the bitwise OR of `a` and `b`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(orps))] -pub unsafe fn _mm_or_pd(a: f64x2, b: f64x2) -> f64x2 { - let a: u64x2 = mem::transmute(a); - let b: u64x2 = mem::transmute(b); - mem::transmute(a | b) +pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d { + let a: __m128i = mem::transmute(a); + let b: __m128i = mem::transmute(b); + mem::transmute(_mm_or_si128(a, b)) } /// Compute the bitwise OR of `a` and `b`. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(xorps))] -pub unsafe fn _mm_xor_pd(a: f64x2, b: f64x2) -> f64x2 { - let a: u64x2 = mem::transmute(a); - let b: u64x2 = mem::transmute(b); - mem::transmute(a ^ b) +pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d { + let a: __m128i = mem::transmute(a); + let b: __m128i = mem::transmute(b); + mem::transmute(_mm_xor_si128(a, b)) } /// Return a new vector with the low element of `a` replaced by the equality @@ -1408,7 +1421,7 @@ pub unsafe fn _mm_xor_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpeqsd))] -pub unsafe fn _mm_cmpeq_sd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 0) } @@ -1417,7 +1430,7 @@ pub unsafe fn _mm_cmpeq_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpltsd))] -pub unsafe fn _mm_cmplt_sd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 1) } @@ -1426,7 +1439,7 @@ pub unsafe fn _mm_cmplt_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmplesd))] -pub unsafe fn _mm_cmple_sd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 2) } @@ -1435,8 +1448,8 @@ pub unsafe fn _mm_cmple_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpltsd))] -pub unsafe fn _mm_cmpgt_sd(a: f64x2, b: f64x2) -> f64x2 { - _mm_cmplt_sd(b, a).replace(1, a.extract(1)) +pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { + simd_insert(_mm_cmplt_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) } /// Return a new vector with the low element of `a` replaced by the @@ -1444,8 +1457,8 @@ pub unsafe fn _mm_cmpgt_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmplesd))] -pub unsafe fn _mm_cmpge_sd(a: f64x2, b: f64x2) -> f64x2 { - _mm_cmple_sd(b, a).replace(1, a.extract(1)) +pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { + simd_insert(_mm_cmple_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) } /// Return a new vector with the low element of `a` replaced by the result @@ -1455,7 +1468,7 @@ pub unsafe fn _mm_cmpge_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpordsd))] -pub unsafe fn _mm_cmpord_sd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 7) } @@ -1465,7 +1478,7 @@ pub unsafe fn _mm_cmpord_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpunordsd))] -pub unsafe fn _mm_cmpunord_sd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 3) } @@ -1474,7 +1487,7 @@ pub unsafe fn _mm_cmpunord_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpneqsd))] -pub unsafe fn _mm_cmpneq_sd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 4) } @@ -1483,7 +1496,7 @@ pub unsafe fn _mm_cmpneq_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpnltsd))] -pub unsafe fn _mm_cmpnlt_sd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 5) } @@ -1492,7 +1505,7 @@ pub unsafe fn _mm_cmpnlt_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpnlesd))] -pub unsafe fn _mm_cmpnle_sd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 6) } @@ -1501,8 +1514,8 @@ pub unsafe fn _mm_cmpnle_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpnltsd))] -pub unsafe fn _mm_cmpngt_sd(a: f64x2, b: f64x2) -> f64x2 { - _mm_cmpnlt_sd(b, a).replace(1, a.extract(1)) +pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { + simd_insert(_mm_cmpnlt_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) } /// Return a new vector with the low element of `a` replaced by the @@ -1510,15 +1523,15 @@ pub unsafe fn _mm_cmpngt_sd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpnlesd))] -pub unsafe fn _mm_cmpnge_sd(a: f64x2, b: f64x2) -> f64x2 { - _mm_cmpnle_sd(b, a).replace(1, a.extract(1)) +pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { + simd_insert(_mm_cmpnle_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) } /// Compare corresponding elements in `a` and `b` for equality. #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpeqpd))] -pub unsafe fn _mm_cmpeq_pd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 0) } @@ -1526,7 +1539,7 @@ pub unsafe fn _mm_cmpeq_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpltpd))] -pub unsafe fn _mm_cmplt_pd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 1) } @@ -1534,7 +1547,7 @@ pub unsafe fn _mm_cmplt_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmplepd))] -pub unsafe fn _mm_cmple_pd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 2) } @@ -1542,7 +1555,7 @@ pub unsafe fn _mm_cmple_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpltpd))] -pub unsafe fn _mm_cmpgt_pd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmplt_pd(b, a) } @@ -1550,7 +1563,7 @@ pub unsafe fn _mm_cmpgt_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmplepd))] -pub unsafe fn _mm_cmpge_pd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmple_pd(b, a) } @@ -1558,7 +1571,7 @@ pub unsafe fn _mm_cmpge_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpordpd))] -pub unsafe fn _mm_cmpord_pd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 7) } @@ -1566,7 +1579,7 @@ pub unsafe fn _mm_cmpord_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpunordpd))] -pub unsafe fn _mm_cmpunord_pd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 3) } @@ -1574,7 +1587,7 @@ pub unsafe fn _mm_cmpunord_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpneqpd))] -pub unsafe fn _mm_cmpneq_pd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 4) } @@ -1582,7 +1595,7 @@ pub unsafe fn _mm_cmpneq_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpnltpd))] -pub unsafe fn _mm_cmpnlt_pd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 5) } @@ -1590,7 +1603,7 @@ pub unsafe fn _mm_cmpnlt_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpnlepd))] -pub unsafe fn _mm_cmpnle_pd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 6) } @@ -1598,7 +1611,7 @@ pub unsafe fn _mm_cmpnle_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpnltpd))] -pub unsafe fn _mm_cmpngt_pd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmpnlt_pd(b, a) } @@ -1607,7 +1620,7 @@ pub unsafe fn _mm_cmpngt_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cmpnlepd))] -pub unsafe fn _mm_cmpnge_pd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmpnle_pd(b, a) } @@ -1615,7 +1628,7 @@ pub unsafe fn _mm_cmpnge_pd(a: f64x2, b: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(comisd))] -pub unsafe fn _mm_comieq_sd(a: f64x2, b: f64x2) -> bool { +pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> bool { comieqsd(a, b) as u8 != 0 } @@ -1623,7 +1636,7 @@ pub unsafe fn _mm_comieq_sd(a: f64x2, b: f64x2) -> bool { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(comisd))] -pub unsafe fn _mm_comilt_sd(a: f64x2, b: f64x2) -> bool { +pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> bool { comiltsd(a, b) as u8 != 0 } @@ -1631,7 +1644,7 @@ pub unsafe fn _mm_comilt_sd(a: f64x2, b: f64x2) -> bool { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(comisd))] -pub unsafe fn _mm_comile_sd(a: f64x2, b: f64x2) -> bool { +pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> bool { comilesd(a, b) as u8 != 0 } @@ -1639,7 +1652,7 @@ pub unsafe fn _mm_comile_sd(a: f64x2, b: f64x2) -> bool { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(comisd))] -pub unsafe fn _mm_comigt_sd(a: f64x2, b: f64x2) -> bool { +pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> bool { comigtsd(a, b) as u8 != 0 } @@ -1647,7 +1660,7 @@ pub unsafe fn _mm_comigt_sd(a: f64x2, b: f64x2) -> bool { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(comisd))] -pub unsafe fn _mm_comige_sd(a: f64x2, b: f64x2) -> bool { +pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> bool { comigesd(a, b) as u8 != 0 } @@ -1655,7 +1668,7 @@ pub unsafe fn _mm_comige_sd(a: f64x2, b: f64x2) -> bool { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(comisd))] -pub unsafe fn _mm_comineq_sd(a: f64x2, b: f64x2) -> bool { +pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> bool { comineqsd(a, b) as u8 != 0 } @@ -1663,7 +1676,7 @@ pub unsafe fn _mm_comineq_sd(a: f64x2, b: f64x2) -> bool { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(ucomisd))] -pub unsafe fn _mm_ucomieq_sd(a: f64x2, b: f64x2) -> bool { +pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> bool { ucomieqsd(a, b) as u8 != 0 } @@ -1671,7 +1684,7 @@ pub unsafe fn _mm_ucomieq_sd(a: f64x2, b: f64x2) -> bool { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(ucomisd))] -pub unsafe fn _mm_ucomilt_sd(a: f64x2, b: f64x2) -> bool { +pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> bool { ucomiltsd(a, b) as u8 != 0 } @@ -1679,7 +1692,7 @@ pub unsafe fn _mm_ucomilt_sd(a: f64x2, b: f64x2) -> bool { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(ucomisd))] -pub unsafe fn _mm_ucomile_sd(a: f64x2, b: f64x2) -> bool { +pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> bool { ucomilesd(a, b) as u8 != 0 } @@ -1687,7 +1700,7 @@ pub unsafe fn _mm_ucomile_sd(a: f64x2, b: f64x2) -> bool { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(ucomisd))] -pub unsafe fn _mm_ucomigt_sd(a: f64x2, b: f64x2) -> bool { +pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> bool { ucomigtsd(a, b) as u8 != 0 } @@ -1695,7 +1708,7 @@ pub unsafe fn _mm_ucomigt_sd(a: f64x2, b: f64x2) -> bool { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(ucomisd))] -pub unsafe fn _mm_ucomige_sd(a: f64x2, b: f64x2) -> bool { +pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> bool { ucomigesd(a, b) as u8 != 0 } @@ -1703,7 +1716,7 @@ pub unsafe fn _mm_ucomige_sd(a: f64x2, b: f64x2) -> bool { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(ucomisd))] -pub unsafe fn _mm_ucomineq_sd(a: f64x2, b: f64x2) -> bool { +pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> bool { ucomineqsd(a, b) as u8 != 0 } @@ -1712,7 +1725,7 @@ pub unsafe fn _mm_ucomineq_sd(a: f64x2, b: f64x2) -> bool { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvtpd2ps))] -pub unsafe fn _mm_cvtpd_ps(a: f64x2) -> f32x4 { +pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 { cvtpd2ps(a) } @@ -1722,7 +1735,7 @@ pub unsafe fn _mm_cvtpd_ps(a: f64x2) -> f32x4 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvtps2pd))] -pub unsafe fn _mm_cvtps_pd(a: f32x4) -> f64x2 { +pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d { cvtps2pd(a) } @@ -1731,8 +1744,8 @@ pub unsafe fn _mm_cvtps_pd(a: f32x4) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvtpd2dq))] -pub unsafe fn _mm_cvtpd_epi32(a: f64x2) -> i32x4 { - cvtpd2dq(a) +pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i { + mem::transmute(cvtpd2dq(a)) } /// Convert the lower double-precision (64-bit) floating-point element in a to @@ -1740,7 +1753,7 @@ pub unsafe fn _mm_cvtpd_epi32(a: f64x2) -> i32x4 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvtsd2si))] -pub unsafe fn _mm_cvtsd_si32(a: f64x2) -> i32 { +pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 { cvtsd2si(a) } @@ -1751,7 +1764,7 @@ pub unsafe fn _mm_cvtsd_si32(a: f64x2) -> i32 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvtsd2ss))] -pub unsafe fn _mm_cvtsd_ss(a: f32x4, b: f64x2) -> f32x4 { +pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 { cvtsd2ss(a, b) } @@ -1759,8 +1772,8 @@ pub unsafe fn _mm_cvtsd_ss(a: f32x4, b: f64x2) -> f32x4 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(all(test, windows), assert_instr(movsd))] // FIXME movq/movlps/mov on other platform -pub unsafe fn _mm_cvtsd_f64(a: f64x2) -> f64 { - a.extract(0) +pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 { + simd_extract(a, 0) } /// Convert the lower single-precision (32-bit) floating-point element in `b` @@ -1770,7 +1783,7 @@ pub unsafe fn _mm_cvtsd_f64(a: f64x2) -> f64 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvtss2sd))] -pub unsafe fn _mm_cvtss_sd(a: f64x2, b: f32x4) -> f64x2 { +pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d { cvtss2sd(a, b) } @@ -1779,8 +1792,8 @@ pub unsafe fn _mm_cvtss_sd(a: f64x2, b: f32x4) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvttpd2dq))] -pub unsafe fn _mm_cvttpd_epi32(a: f64x2) -> i32x4 { - cvttpd2dq(a) +pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i { + mem::transmute(cvttpd2dq(a)) } /// Convert the lower double-precision (64-bit) floating-point element in `a` @@ -1788,7 +1801,7 @@ pub unsafe fn _mm_cvttpd_epi32(a: f64x2) -> i32x4 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvttsd2si))] -pub unsafe fn _mm_cvttsd_si32(a: f64x2) -> i32 { +pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 { cvttsd2si(a) } @@ -1797,48 +1810,48 @@ pub unsafe fn _mm_cvttsd_si32(a: f64x2) -> i32 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvttps2dq))] -pub unsafe fn _mm_cvttps_epi32(a: f32x4) -> i32x4 { - cvttps2dq(a) +pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i { + mem::transmute(cvttps2dq(a)) } /// Copy double-precision (64-bit) floating-point element `a` to the lower /// element of the packed 64-bit return value. #[inline(always)] #[target_feature = "+sse2"] -pub unsafe fn _mm_set_sd(a: f64) -> f64x2 { - f64x2::new(a, 0_f64) +pub unsafe fn _mm_set_sd(a: f64) -> __m128d { + _mm_set_pd(0.0, a) } /// Broadcast double-precision (64-bit) floating-point value a to all elements /// of the return value. #[inline(always)] #[target_feature = "+sse2"] -pub unsafe fn _mm_set1_pd(a: f64) -> f64x2 { - f64x2::new(a, a) +pub unsafe fn _mm_set1_pd(a: f64) -> __m128d { + _mm_set_pd(a, a) } /// Broadcast double-precision (64-bit) floating-point value a to all elements /// of the return value. #[inline(always)] #[target_feature = "+sse2"] -pub unsafe fn _mm_set_pd1(a: f64) -> f64x2 { - f64x2::new(a, a) +pub unsafe fn _mm_set_pd1(a: f64) -> __m128d { + _mm_set_pd(a, a) } /// Set packed double-precision (64-bit) floating-point elements in the return /// value with the supplied values. #[inline(always)] #[target_feature = "+sse2"] -pub unsafe fn _mm_set_pd(a: f64, b: f64) -> f64x2 { - f64x2::new(b, a) +pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d { + __m128d(b, a) } /// Set packed double-precision (64-bit) floating-point elements in the return /// value with the supplied values in reverse order. #[inline(always)] #[target_feature = "+sse2"] -pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> f64x2 { - f64x2::new(a, b) +pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d { + _mm_set_pd(b, a) } /// Returns packed double-precision (64-bit) floating-point elements with all @@ -1846,8 +1859,8 @@ pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected -pub unsafe fn _mm_setzero_pd() -> f64x2 { - f64x2::splat(0_f64) +pub unsafe fn _mm_setzero_pd() -> __m128d { + _mm_set_pd(0.0, 0.0) } /// Return a mask of the most significant bit of each element in `a`. @@ -1857,7 +1870,7 @@ pub unsafe fn _mm_setzero_pd() -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(movmskpd))] -pub unsafe fn _mm_movemask_pd(a: f64x2) -> i32 { +pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 { movmskpd(a) } @@ -1868,8 +1881,8 @@ pub unsafe fn _mm_movemask_pd(a: f64x2) -> i32 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(movaps))] -pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> f64x2 { - *(mem_addr as *const f64x2) +pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d { + *(mem_addr as *const __m128d) } /// Loads a 64-bit double-precision value to the low element of a @@ -1877,8 +1890,8 @@ pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(movsd))] -pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> f64x2 { - f64x2::new(*mem_addr, 0.) +pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d { + _mm_setr_pd(*mem_addr, 0.) } /// Loads a double-precision value into the high-order bits of a 128-bit @@ -1887,8 +1900,8 @@ pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(movhpd))] -pub unsafe fn _mm_loadh_pd(a: f64x2, mem_addr: *const f64) -> f64x2 { - f64x2::new(a.extract(0), *mem_addr) +pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d { + _mm_setr_pd(simd_extract(a, 0), *mem_addr) } /// Loads a double-precision value into the low-order bits of a 128-bit @@ -1897,8 +1910,8 @@ pub unsafe fn _mm_loadh_pd(a: f64x2, mem_addr: *const f64) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(movlpd))] -pub unsafe fn _mm_loadl_pd(a: f64x2, mem_addr: *const f64) -> f64x2 { - f64x2::new(*mem_addr, a.extract(1)) +pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { + _mm_setr_pd(*mem_addr, simd_extract(a, 1)) } /// Stores a 128-bit floating point vector of [2 x double] to a 128-bit @@ -1908,7 +1921,7 @@ pub unsafe fn _mm_loadl_pd(a: f64x2, mem_addr: *const f64) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd -pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: f64x2) { +pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) { ::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a); } @@ -1917,8 +1930,8 @@ pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: f64x2) { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(all(test, not(windows)), assert_instr(movlps))] // FIXME movsd only on windows -pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: f64x2) { - *mem_addr = a.extract(0) +pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) { + *mem_addr = simd_extract(a, 0) } /// Store 128-bits (composed of 2 packed double-precision (64-bit) @@ -1927,8 +1940,8 @@ pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: f64x2) { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(movaps))] -pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: f64x2) { - *(mem_addr as *mut f64x2) = a; +pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) { + *(mem_addr as *mut __m128d) = a; } /// Store 128-bits (composed of 2 packed double-precision (64-bit) @@ -1937,7 +1950,7 @@ pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: f64x2) { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected -pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: f64x2) { +pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) { storeupd(mem_addr as *mut i8, a); } @@ -1946,9 +1959,9 @@ pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: f64x2) { /// 16-byte boundary or a general-protection exception may be generated. #[inline(always)] #[target_feature = "+sse2"] -pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: f64x2) { - let b: f64x2 = simd_shuffle2(a, a, [0, 0]); - *(mem_addr as *mut f64x2) = b; +pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { + let b: __m128d = simd_shuffle2(a, a, [0, 0]); + *(mem_addr as *mut __m128d) = b; } /// Store the lower double-precision (64-bit) floating-point element from `a` @@ -1956,9 +1969,9 @@ pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: f64x2) { /// 16-byte boundary or a general-protection exception may be generated. #[inline(always)] #[target_feature = "+sse2"] -pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: f64x2) { - let b: f64x2 = simd_shuffle2(a, a, [0, 0]); - *(mem_addr as *mut f64x2) = b; +pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { + let b: __m128d = simd_shuffle2(a, a, [0, 0]); + *(mem_addr as *mut __m128d) = b; } /// Store 2 double-precision (64-bit) floating-point elements from `a` into @@ -1967,9 +1980,9 @@ pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: f64x2) { /// exception may be generated. #[inline(always)] #[target_feature = "+sse2"] -pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: f64x2) { - let b: f64x2 = simd_shuffle2(a, a, [1, 0]); - *(mem_addr as *mut f64x2) = b; +pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) { + let b: __m128d = simd_shuffle2(a, a, [1, 0]); + *(mem_addr as *mut __m128d) = b; } /// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a @@ -1977,8 +1990,8 @@ pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: f64x2) { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(movhpd))] -pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: f64x2) { - *mem_addr = a.extract(1) +pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) { + *mem_addr = simd_extract(a, 1); } /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a @@ -1986,8 +1999,8 @@ pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: f64x2) { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(all(test, not(windows)), assert_instr(movlps))] // FIXME movlpd (movsd on windows) -pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: f64x2) { - *mem_addr = a.extract(0) +pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) { + *mem_addr = simd_extract(a, 0); } /// Load a double-precision (64-bit) floating-point element from memory @@ -1995,9 +2008,9 @@ pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: f64x2) { #[inline(always)] #[target_feature = "+sse2"] //#[cfg_attr(test, assert_instr(movapd))] FIXME movapd expected -pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> f64x2 { +pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d { let d = *mem_addr; - f64x2::new(d, d) + _mm_setr_pd(d, d) } /// Load a double-precision (64-bit) floating-point element from memory @@ -2005,9 +2018,9 @@ pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] //#[cfg_attr(test, assert_instr(movapd))] FIXME movapd expected -pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> f64x2 { +pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d { let d = *mem_addr; - f64x2::new(d, d) + _mm_setr_pd(d, d) } /// Load 2 double-precision (64-bit) floating-point elements from memory into @@ -2016,7 +2029,7 @@ pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(movapd))] -pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> f64x2 { +pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d { let a = _mm_load_pd(mem_addr); simd_shuffle2(a, a, [1, 0]) } @@ -2027,12 +2040,12 @@ pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(movups))] -pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> f64x2 { +pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { let mut dst = _mm_undefined_pd(); ptr::copy_nonoverlapping( mem_addr as *const u8, - &mut dst as *mut f64x2 as *mut u8, - mem::size_of::(), + &mut dst as *mut __m128d as *mut u8, + mem::size_of::<__m128d>(), ); dst } @@ -2043,7 +2056,7 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(shufpd, imm8 = 1))] -pub unsafe fn _mm_shuffle_pd(a: f64x2, b: f64x2, imm8: i32) -> f64x2 { +pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { match imm8 & 0b11 { 0b00 => simd_shuffle2(a, b, [0, 2]), 0b01 => simd_shuffle2(a, b, [1, 2]), @@ -2058,15 +2071,15 @@ pub unsafe fn _mm_shuffle_pd(a: f64x2, b: f64x2, imm8: i32) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(movsd))] -pub unsafe fn _mm_move_sd(a: f64x2, b: f64x2) -> f64x2 { - f64x2::new(b.extract(0), a.extract(1)) +pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d { + _mm_setr_pd(simd_extract(b, 0), simd_extract(a, 1)) } /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit /// floating-point vector of [4 x float]. #[inline(always)] #[target_feature = "+sse2"] -pub unsafe fn _mm_castpd_ps(a: f64x2) -> f32x4 { +pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 { mem::transmute(a) } @@ -2074,7 +2087,7 @@ pub unsafe fn _mm_castpd_ps(a: f64x2) -> f32x4 { /// integer vector. #[inline(always)] #[target_feature = "+sse2"] -pub unsafe fn _mm_castpd_si128(a: f64x2) -> __m128i { +pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i { simd_cast(a) } @@ -2082,7 +2095,7 @@ pub unsafe fn _mm_castpd_si128(a: f64x2) -> __m128i { /// floating-point vector of [2 x double]. #[inline(always)] #[target_feature = "+sse2"] -pub unsafe fn _mm_castps_pd(a: f32x4) -> f64x2 { +pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d { mem::transmute(a) } @@ -2090,7 +2103,7 @@ pub unsafe fn _mm_castps_pd(a: f32x4) -> f64x2 { /// integer vector. #[inline(always)] #[target_feature = "+sse2"] -pub unsafe fn _mm_castps_si128(a: f32x4) -> __m128i { +pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i { mem::transmute(a) } @@ -2098,7 +2111,7 @@ pub unsafe fn _mm_castps_si128(a: f32x4) -> __m128i { /// of [2 x double]. #[inline(always)] #[target_feature = "+sse2"] -pub unsafe fn _mm_castsi128_pd(a: __m128i) -> f64x2 { +pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d { simd_cast(a) } @@ -2106,45 +2119,45 @@ pub unsafe fn _mm_castsi128_pd(a: __m128i) -> f64x2 { /// of [4 x float]. #[inline(always)] #[target_feature = "+sse2"] -pub unsafe fn _mm_castsi128_ps(a: __m128i) -> f32x4 { +pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 { mem::transmute(a) } /// Return vector of type __m128d with undefined elements. #[inline(always)] #[target_feature = "+sse2"] -pub unsafe fn _mm_undefined_pd() -> f64x2 { - f64x2::splat(mem::uninitialized()) +pub unsafe fn _mm_undefined_pd() -> __m128d { + _mm_set1_pd(mem::uninitialized()) } /// Return vector of type __m128i with undefined elements. #[inline(always)] #[target_feature = "+sse2"] pub unsafe fn _mm_undefined_si128() -> __m128i { - mem::transmute(i32x4::splat(mem::uninitialized())) + _mm_set1_epi8(mem::uninitialized()) } -/// The resulting `f64x2` element is composed by the low-order values of -/// the two `f64x2` interleaved input elements, i.e.: +/// The resulting `__m128d` element is composed by the low-order values of +/// the two `__m128d` interleaved input elements, i.e.: /// /// * The [127:64] bits are copied from the [127:64] bits of the second input /// * The [63:0] bits are copied from the [127:64] bits of the first input #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(unpckhpd))] -pub unsafe fn _mm_unpackhi_pd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { simd_shuffle2(a, b, [1, 3]) } -/// The resulting `f64x2` element is composed by the high-order values of -/// the two `f64x2` interleaved input elements, i.e.: +/// The resulting `__m128d` element is composed by the high-order values of +/// the two `__m128d` interleaved input elements, i.e.: /// /// * The [127:64] bits are copied from the [63:0] bits of the second input /// * The [63:0] bits are copied from the [63:0] bits of the first input #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(unpcklpd))] -pub unsafe fn _mm_unpacklo_pd(a: f64x2, b: f64x2) -> f64x2 { +pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d { simd_shuffle2(a, b, [0, 2]) } @@ -2229,9 +2242,9 @@ extern "C" { #[link_name = "llvm.x86.sse2.psrl.q"] fn psrlq(a: i64x2, count: i64x2) -> i64x2; #[link_name = "llvm.x86.sse2.cvtdq2ps"] - fn cvtdq2ps(a: i32x4) -> f32x4; + fn cvtdq2ps(a: i32x4) -> __m128; #[link_name = "llvm.x86.sse2.cvtps2dq"] - fn cvtps2dq(a: f32x4) -> i32x4; + fn cvtps2dq(a: __m128) -> i32x4; #[link_name = "llvm.x86.sse2.maskmov.dqu"] fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8); #[link_name = "llvm.x86.sse2.packsswb.128"] @@ -2243,501 +2256,514 @@ extern "C" { #[link_name = "llvm.x86.sse2.pmovmskb.128"] fn pmovmskb(a: i8x16) -> i32; #[link_name = "llvm.x86.sse2.max.sd"] - fn maxsd(a: f64x2, b: f64x2) -> f64x2; + fn maxsd(a: __m128d, b: __m128d) -> __m128d; #[link_name = "llvm.x86.sse2.max.pd"] - fn maxpd(a: f64x2, b: f64x2) -> f64x2; + fn maxpd(a: __m128d, b: __m128d) -> __m128d; #[link_name = "llvm.x86.sse2.min.sd"] - fn minsd(a: f64x2, b: f64x2) -> f64x2; + fn minsd(a: __m128d, b: __m128d) -> __m128d; #[link_name = "llvm.x86.sse2.min.pd"] - fn minpd(a: f64x2, b: f64x2) -> f64x2; + fn minpd(a: __m128d, b: __m128d) -> __m128d; #[link_name = "llvm.x86.sse2.sqrt.sd"] - fn sqrtsd(a: f64x2) -> f64x2; + fn sqrtsd(a: __m128d) -> __m128d; #[link_name = "llvm.x86.sse2.sqrt.pd"] - fn sqrtpd(a: f64x2) -> f64x2; + fn sqrtpd(a: __m128d) -> __m128d; #[link_name = "llvm.x86.sse2.cmp.sd"] - fn cmpsd(a: f64x2, b: f64x2, imm8: i8) -> f64x2; + fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; #[link_name = "llvm.x86.sse2.cmp.pd"] - fn cmppd(a: f64x2, b: f64x2, imm8: i8) -> f64x2; + fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; #[link_name = "llvm.x86.sse2.comieq.sd"] - fn comieqsd(a: f64x2, b: f64x2) -> i32; + fn comieqsd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.comilt.sd"] - fn comiltsd(a: f64x2, b: f64x2) -> i32; + fn comiltsd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.comile.sd"] - fn comilesd(a: f64x2, b: f64x2) -> i32; + fn comilesd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.comigt.sd"] - fn comigtsd(a: f64x2, b: f64x2) -> i32; + fn comigtsd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.comige.sd"] - fn comigesd(a: f64x2, b: f64x2) -> i32; + fn comigesd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.comineq.sd"] - fn comineqsd(a: f64x2, b: f64x2) -> i32; + fn comineqsd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.ucomieq.sd"] - fn ucomieqsd(a: f64x2, b: f64x2) -> i32; + fn ucomieqsd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.ucomilt.sd"] - fn ucomiltsd(a: f64x2, b: f64x2) -> i32; + fn ucomiltsd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.ucomile.sd"] - fn ucomilesd(a: f64x2, b: f64x2) -> i32; + fn ucomilesd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.ucomigt.sd"] - fn ucomigtsd(a: f64x2, b: f64x2) -> i32; + fn ucomigtsd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.ucomige.sd"] - fn ucomigesd(a: f64x2, b: f64x2) -> i32; + fn ucomigesd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.ucomineq.sd"] - fn ucomineqsd(a: f64x2, b: f64x2) -> i32; + fn ucomineqsd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.movmsk.pd"] - fn movmskpd(a: f64x2) -> i32; + fn movmskpd(a: __m128d) -> i32; #[link_name = "llvm.x86.sse2.cvtpd2ps"] - fn cvtpd2ps(a: f64x2) -> f32x4; + fn cvtpd2ps(a: __m128d) -> __m128; #[link_name = "llvm.x86.sse2.cvtps2pd"] - fn cvtps2pd(a: f32x4) -> f64x2; + fn cvtps2pd(a: __m128) -> __m128d; #[link_name = "llvm.x86.sse2.cvtpd2dq"] - fn cvtpd2dq(a: f64x2) -> i32x4; + fn cvtpd2dq(a: __m128d) -> i32x4; #[link_name = "llvm.x86.sse2.cvtsd2si"] - fn cvtsd2si(a: f64x2) -> i32; + fn cvtsd2si(a: __m128d) -> i32; #[link_name = "llvm.x86.sse2.cvtsd2ss"] - fn cvtsd2ss(a: f32x4, b: f64x2) -> f32x4; + fn cvtsd2ss(a: __m128, b: __m128d) -> __m128; #[link_name = "llvm.x86.sse2.cvtss2sd"] - fn cvtss2sd(a: f64x2, b: f32x4) -> f64x2; + fn cvtss2sd(a: __m128d, b: __m128) -> __m128d; #[link_name = "llvm.x86.sse2.cvttpd2dq"] - fn cvttpd2dq(a: f64x2) -> i32x4; + fn cvttpd2dq(a: __m128d) -> i32x4; #[link_name = "llvm.x86.sse2.cvttsd2si"] - fn cvttsd2si(a: f64x2) -> i32; + fn cvttsd2si(a: __m128d) -> i32; #[link_name = "llvm.x86.sse2.cvttps2dq"] - fn cvttps2dq(a: f32x4) -> i32x4; + fn cvttps2dq(a: __m128) -> i32x4; #[link_name = "llvm.x86.sse2.storeu.dq"] fn storeudq(mem_addr: *mut i8, a: __m128i); #[link_name = "llvm.x86.sse2.storeu.pd"] - fn storeupd(mem_addr: *mut i8, a: f64x2); + fn storeupd(mem_addr: *mut i8, a: __m128d); } #[cfg(test)] mod tests { + use std::mem::transmute; + use std::f64::{self, NAN}; + use std::f32; + use std::i32; + use stdsimd_test::simd_test; use test::black_box; // Used to inhibit constant-folding. + use x86::*; + use v128::*; - use v128::*; - use x86::i586::sse2; + // not actually an intrinsics in SSE2 but useful in the tests below + #[target_feature = "+sse2"] + unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i { + _mm_set_epi64x(b, a) + } #[simd_test = "sse2"] - unsafe fn _mm_pause() { - sse2::_mm_pause(); + unsafe fn test_mm_pause() { + _mm_pause(); } #[simd_test = "sse2"] - unsafe fn _mm_clflush() { + unsafe fn test_mm_clflush() { let x = 0; - sse2::_mm_clflush(&x as *const _ as *mut u8); + _mm_clflush(&x as *const _ as *mut u8); } #[simd_test = "sse2"] - unsafe fn _mm_lfence() { - sse2::_mm_lfence(); + unsafe fn test_mm_lfence() { + _mm_lfence(); } #[simd_test = "sse2"] - unsafe fn _mm_mfence() { - sse2::_mm_mfence(); + unsafe fn test_mm_mfence() { + _mm_mfence(); } #[simd_test = "sse2"] - unsafe fn _mm_add_epi8() { + unsafe fn test_mm_add_epi8() { let a = - i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); #[cfg_attr(rustfmt, rustfmt_skip)] - let b = i8x16::new( + let b = _mm_setr_epi8( 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); - let r = sse2::_mm_add_epi8(a, b); + let r = _mm_add_epi8(a, b); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = i8x16::new( + let e = _mm_setr_epi8( 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, ); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_add_epi8_overflow() { - let a = i8x16::splat(0x7F); - let b = i8x16::splat(1); - let r = sse2::_mm_add_epi8(a, b); - assert_eq!(r, i8x16::splat(-128)); + unsafe fn test_mm_add_epi8_overflow() { + let a = _mm_set1_epi8(0x7F); + let b = _mm_set1_epi8(1); + let r = _mm_add_epi8(a, b); + assert_eq!(r, _mm_set1_epi8(-128)); } #[simd_test = "sse2"] - unsafe fn _mm_add_epi16() { - let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15); - let r = sse2::_mm_add_epi16(a, b); - let e = i16x8::new(8, 10, 12, 14, 16, 18, 20, 22); + unsafe fn test_mm_add_epi16() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_add_epi16(a, b); + let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_add_epi32() { - let a = i32x4::new(0, 1, 2, 3); - let b = i32x4::new(4, 5, 6, 7); - let r = sse2::_mm_add_epi32(a, b); - let e = i32x4::new(4, 6, 8, 10); + unsafe fn test_mm_add_epi32() { + let a = _mm_setr_epi32(0, 1, 2, 3); + let b = _mm_setr_epi32(4, 5, 6, 7); + let r = _mm_add_epi32(a, b); + let e = _mm_setr_epi32(4, 6, 8, 10); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_add_epi64() { - let a = i64x2::new(0, 1); - let b = i64x2::new(2, 3); - let r = sse2::_mm_add_epi64(a, b); - let e = i64x2::new(2, 4); + unsafe fn test_mm_add_epi64() { + let a = _mm_setr_epi64x(0, 1); + let b = _mm_setr_epi64x(2, 3); + let r = _mm_add_epi64(a, b); + let e = _mm_setr_epi64x(2, 4); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_adds_epi8() { + unsafe fn test_mm_adds_epi8() { let a = - i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); #[cfg_attr(rustfmt, rustfmt_skip)] - let b = i8x16::new( + let b = _mm_setr_epi8( 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); - let r = sse2::_mm_adds_epi8(a, b); + let r = _mm_adds_epi8(a, b); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = i8x16::new( + let e = _mm_setr_epi8( 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, ); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_adds_epi8_saturate_positive() { - let a = i8x16::splat(0x7F); - let b = i8x16::splat(1); - let r = sse2::_mm_adds_epi8(a, b); + unsafe fn test_mm_adds_epi8_saturate_positive() { + let a = _mm_set1_epi8(0x7F); + let b = _mm_set1_epi8(1); + let r = _mm_adds_epi8(a, b); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_adds_epi8_saturate_negative() { - let a = i8x16::splat(-0x80); - let b = i8x16::splat(-1); - let r = sse2::_mm_adds_epi8(a, b); + unsafe fn test_mm_adds_epi8_saturate_negative() { + let a = _mm_set1_epi8(-0x80); + let b = _mm_set1_epi8(-1); + let r = _mm_adds_epi8(a, b); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_adds_epi16() { - let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15); - let r = sse2::_mm_adds_epi16(a, b); - let e = i16x8::new(8, 10, 12, 14, 16, 18, 20, 22); + unsafe fn test_mm_adds_epi16() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_adds_epi16(a, b); + let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_adds_epi16_saturate_positive() { - let a = i16x8::splat(0x7FFF); - let b = i16x8::splat(1); - let r = sse2::_mm_adds_epi16(a, b); + unsafe fn test_mm_adds_epi16_saturate_positive() { + let a = _mm_set1_epi16(0x7FFF); + let b = _mm_set1_epi16(1); + let r = _mm_adds_epi16(a, b); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_adds_epi16_saturate_negative() { - let a = i16x8::splat(-0x8000); - let b = i16x8::splat(-1); - let r = sse2::_mm_adds_epi16(a, b); + unsafe fn test_mm_adds_epi16_saturate_negative() { + let a = _mm_set1_epi16(-0x8000); + let b = _mm_set1_epi16(-1); + let r = _mm_adds_epi16(a, b); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_adds_epu8() { + unsafe fn test_mm_adds_epu8() { let a = - u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); #[cfg_attr(rustfmt, rustfmt_skip)] - let b = u8x16::new( + let b = _mm_setr_epi8( 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); - let r = sse2::_mm_adds_epu8(a, b); + let r = _mm_adds_epu8(a, b); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = u8x16::new( + let e = _mm_setr_epi8( 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, ); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_adds_epu8_saturate() { - let a = u8x16::splat(0xFF); - let b = u8x16::splat(1); - let r = sse2::_mm_adds_epu8(a, b); + unsafe fn test_mm_adds_epu8_saturate() { + let a = _mm_set1_epi8(!0); + let b = _mm_set1_epi8(1); + let r = _mm_adds_epu8(a, b); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_adds_epu16() { - let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15); - let r = sse2::_mm_adds_epu16(a, b); - let e = u16x8::new(8, 10, 12, 14, 16, 18, 20, 22); + unsafe fn test_mm_adds_epu16() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_adds_epu16(a, b); + let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_adds_epu16_saturate() { - let a = u16x8::splat(0xFFFF); - let b = u16x8::splat(1); - let r = sse2::_mm_adds_epu16(a, b); + unsafe fn test_mm_adds_epu16_saturate() { + let a = _mm_set1_epi16(!0); + let b = _mm_set1_epi16(1); + let r = _mm_adds_epu16(a, b); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_avg_epu8() { - let (a, b) = (u8x16::splat(3), u8x16::splat(9)); - let r = sse2::_mm_avg_epu8(a, b); - assert_eq!(r, u8x16::splat(6)); + unsafe fn test_mm_avg_epu8() { + let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9)); + let r = _mm_avg_epu8(a, b); + assert_eq!(r, _mm_set1_epi8(6)); } #[simd_test = "sse2"] - unsafe fn _mm_avg_epu16() { - let (a, b) = (u16x8::splat(3), u16x8::splat(9)); - let r = sse2::_mm_avg_epu16(a, b); - assert_eq!(r, u16x8::splat(6)); + unsafe fn test_mm_avg_epu16() { + let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9)); + let r = _mm_avg_epu16(a, b); + assert_eq!(r, _mm_set1_epi16(6)); } #[simd_test = "sse2"] - unsafe fn _mm_madd_epi16() { - let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b = i16x8::new(9, 10, 11, 12, 13, 14, 15, 16); - let r = sse2::_mm_madd_epi16(a, b); - let e = i32x4::new(29, 81, 149, 233); + unsafe fn test_mm_madd_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm_madd_epi16(a, b); + let e = _mm_setr_epi32(29, 81, 149, 233); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_max_epi16() { - let a = i16x8::splat(1); - let b = i16x8::splat(-1); - let r = sse2::_mm_max_epi16(a, b); + unsafe fn test_mm_max_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(-1); + let r = _mm_max_epi16(a, b); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_max_epu8() { - let a = u8x16::splat(1); - let b = u8x16::splat(255); - let r = sse2::_mm_max_epu8(a, b); + unsafe fn test_mm_max_epu8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(!0); + let r = _mm_max_epu8(a, b); assert_eq!(r, b); } #[simd_test = "sse2"] - unsafe fn _mm_min_epi16() { - let a = i16x8::splat(1); - let b = i16x8::splat(-1); - let r = sse2::_mm_min_epi16(a, b); + unsafe fn test_mm_min_epi16() { + let a = _mm_set1_epi16(1); + let b = _mm_set1_epi16(-1); + let r = _mm_min_epi16(a, b); assert_eq!(r, b); } #[simd_test = "sse2"] - unsafe fn _mm_min_epu8() { - let a = u8x16::splat(1); - let b = u8x16::splat(255); - let r = sse2::_mm_min_epu8(a, b); + unsafe fn test_mm_min_epu8() { + let a = _mm_set1_epi8(1); + let b = _mm_set1_epi8(!0); + let r = _mm_min_epu8(a, b); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_mulhi_epi16() { - let (a, b) = (i16x8::splat(1000), i16x8::splat(-1001)); - let r = sse2::_mm_mulhi_epi16(a, b); - assert_eq!(r, i16x8::splat(-16)); + unsafe fn test_mm_mulhi_epi16() { + let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); + let r = _mm_mulhi_epi16(a, b); + assert_eq!(r, _mm_set1_epi16(-16)); } #[simd_test = "sse2"] - unsafe fn _mm_mulhi_epu16() { - let (a, b) = (u16x8::splat(1000), u16x8::splat(1001)); - let r = sse2::_mm_mulhi_epu16(a, b); - assert_eq!(r, u16x8::splat(15)); + unsafe fn test_mm_mulhi_epu16() { + let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001)); + let r = _mm_mulhi_epu16(a, b); + assert_eq!(r, _mm_set1_epi16(15)); } #[simd_test = "sse2"] - unsafe fn _mm_mullo_epi16() { - let (a, b) = (i16x8::splat(1000), i16x8::splat(-1001)); - let r = sse2::_mm_mullo_epi16(a, b); - assert_eq!(r, i16x8::splat(-17960)); + unsafe fn test_mm_mullo_epi16() { + let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); + let r = _mm_mullo_epi16(a, b); + assert_eq!(r, _mm_set1_epi16(-17960)); } #[simd_test = "sse2"] - unsafe fn _mm_mul_epu32() { - let a = u32x4::from(u64x2::new(1_000_000_000, 1 << 34)); - let b = u32x4::from(u64x2::new(1_000_000_000, 1 << 35)); - let r = sse2::_mm_mul_epu32(a, b); - let e = u64x2::new(1_000_000_000 * 1_000_000_000, 0); + unsafe fn test_mm_mul_epu32() { + let a = _mm_setr_epi64x(1_000_000_000, 1 << 34); + let b = _mm_setr_epi64x(1_000_000_000, 1 << 35); + let r = _mm_mul_epu32(a, b); + let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_sad_epu8() { + unsafe fn test_mm_sad_epu8() { #[cfg_attr(rustfmt, rustfmt_skip)] - let a = u8x16::new( - 255, 254, 253, 252, 1, 2, 3, 4, 155, 154, 153, 152, 1, 2, 3, 4, + let a = _mm_setr_epi8( + 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, + 1, 2, 3, 4, + 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8, + 1, 2, 3, 4, ); - let b = u8x16::new(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2); - let r = sse2::_mm_sad_epu8(a, b); - let e = u64x2::new(1020, 614); + let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2); + let r = _mm_sad_epu8(a, b); + let e = _mm_setr_epi64x(1020, 614); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_sub_epi8() { - let (a, b) = (i8x16::splat(5), i8x16::splat(6)); - let r = sse2::_mm_sub_epi8(a, b); - assert_eq!(r, i8x16::splat(-1)); + unsafe fn test_mm_sub_epi8() { + let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6)); + let r = _mm_sub_epi8(a, b); + assert_eq!(r, _mm_set1_epi8(-1)); } #[simd_test = "sse2"] - unsafe fn _mm_sub_epi16() { - let (a, b) = (i16x8::splat(5), i16x8::splat(6)); - let r = sse2::_mm_sub_epi16(a, b); - assert_eq!(r, i16x8::splat(-1)); + unsafe fn test_mm_sub_epi16() { + let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6)); + let r = _mm_sub_epi16(a, b); + assert_eq!(r, _mm_set1_epi16(-1)); } #[simd_test = "sse2"] - unsafe fn _mm_sub_epi32() { - let (a, b) = (i32x4::splat(5), i32x4::splat(6)); - let r = sse2::_mm_sub_epi32(a, b); - assert_eq!(r, i32x4::splat(-1)); + unsafe fn test_mm_sub_epi32() { + let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6)); + let r = _mm_sub_epi32(a, b); + assert_eq!(r, _mm_set1_epi32(-1)); } #[simd_test = "sse2"] - unsafe fn _mm_sub_epi64() { - let (a, b) = (i64x2::splat(5), i64x2::splat(6)); - let r = sse2::_mm_sub_epi64(a, b); - assert_eq!(r, i64x2::splat(-1)); + unsafe fn test_mm_sub_epi64() { + let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6)); + let r = _mm_sub_epi64(a, b); + assert_eq!(r, _mm_set1_epi64x(-1)); } #[simd_test = "sse2"] - unsafe fn _mm_subs_epi8() { - let (a, b) = (i8x16::splat(5), i8x16::splat(2)); - let r = sse2::_mm_subs_epi8(a, b); - assert_eq!(r, i8x16::splat(3)); + unsafe fn test_mm_subs_epi8() { + let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); + let r = _mm_subs_epi8(a, b); + assert_eq!(r, _mm_set1_epi8(3)); } #[simd_test = "sse2"] - unsafe fn _mm_subs_epi8_saturate_positive() { - let a = i8x16::splat(0x7F); - let b = i8x16::splat(-1); - let r = sse2::_mm_subs_epi8(a, b); + unsafe fn test_mm_subs_epi8_saturate_positive() { + let a = _mm_set1_epi8(0x7F); + let b = _mm_set1_epi8(-1); + let r = _mm_subs_epi8(a, b); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_subs_epi8_saturate_negative() { - let a = i8x16::splat(-0x80); - let b = i8x16::splat(1); - let r = sse2::_mm_subs_epi8(a, b); + unsafe fn test_mm_subs_epi8_saturate_negative() { + let a = _mm_set1_epi8(-0x80); + let b = _mm_set1_epi8(1); + let r = _mm_subs_epi8(a, b); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_subs_epi16() { - let (a, b) = (i16x8::splat(5), i16x8::splat(2)); - let r = sse2::_mm_subs_epi16(a, b); - assert_eq!(r, i16x8::splat(3)); + unsafe fn test_mm_subs_epi16() { + let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); + let r = _mm_subs_epi16(a, b); + assert_eq!(r, _mm_set1_epi16(3)); } #[simd_test = "sse2"] - unsafe fn _mm_subs_epi16_saturate_positive() { - let a = i16x8::splat(0x7FFF); - let b = i16x8::splat(-1); - let r = sse2::_mm_subs_epi16(a, b); + unsafe fn test_mm_subs_epi16_saturate_positive() { + let a = _mm_set1_epi16(0x7FFF); + let b = _mm_set1_epi16(-1); + let r = _mm_subs_epi16(a, b); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_subs_epi16_saturate_negative() { - let a = i16x8::splat(-0x8000); - let b = i16x8::splat(1); - let r = sse2::_mm_subs_epi16(a, b); + unsafe fn test_mm_subs_epi16_saturate_negative() { + let a = _mm_set1_epi16(-0x8000); + let b = _mm_set1_epi16(1); + let r = _mm_subs_epi16(a, b); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_subs_epu8() { - let (a, b) = (u8x16::splat(5), u8x16::splat(2)); - let r = sse2::_mm_subs_epu8(a, b); - assert_eq!(r, u8x16::splat(3)); + unsafe fn test_mm_subs_epu8() { + let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); + let r = _mm_subs_epu8(a, b); + assert_eq!(r, _mm_set1_epi8(3)); } #[simd_test = "sse2"] - unsafe fn _mm_subs_epu8_saturate() { - let a = u8x16::splat(0); - let b = u8x16::splat(1); - let r = sse2::_mm_subs_epu8(a, b); + unsafe fn test_mm_subs_epu8_saturate() { + let a = _mm_set1_epi8(0); + let b = _mm_set1_epi8(1); + let r = _mm_subs_epu8(a, b); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_subs_epu16() { - let (a, b) = (u16x8::splat(5), u16x8::splat(2)); - let r = sse2::_mm_subs_epu16(a, b); - assert_eq!(r, u16x8::splat(3)); + unsafe fn test_mm_subs_epu16() { + let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); + let r = _mm_subs_epu16(a, b); + assert_eq!(r, _mm_set1_epi16(3)); } #[simd_test = "sse2"] - unsafe fn _mm_subs_epu16_saturate() { - let a = u16x8::splat(0); - let b = u16x8::splat(1); - let r = sse2::_mm_subs_epu16(a, b); + unsafe fn test_mm_subs_epu16_saturate() { + let a = _mm_set1_epi16(0); + let b = _mm_set1_epi16(1); + let r = _mm_subs_epu16(a, b); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_slli_si128() { + unsafe fn test_mm_slli_si128() { #[cfg_attr(rustfmt, rustfmt_skip)] - let a = i8x16::new( + let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); - let r = sse2::_mm_slli_si128(a, 1); + let r = _mm_slli_si128(a, 1); let e = - i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); assert_eq!(r, e); #[cfg_attr(rustfmt, rustfmt_skip)] - let a = i8x16::new( + let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); - let r = sse2::_mm_slli_si128(a, 15); - let e = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + let r = _mm_slli_si128(a, 15); + let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); assert_eq!(r, e); #[cfg_attr(rustfmt, rustfmt_skip)] - let a = i8x16::new( + let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); - let r = sse2::_mm_slli_si128(a, 16); - assert_eq!(r, i8x16::splat(0)); + let r = _mm_slli_si128(a, 16); + assert_eq!(r, _mm_set1_epi8(0)); #[cfg_attr(rustfmt, rustfmt_skip)] - let a = i8x16::new( + let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); - let r = sse2::_mm_slli_si128(a, -1); - assert_eq!(i8x16::splat(0), r); + let r = _mm_slli_si128(a, -1); + assert_eq!(_mm_set1_epi8(0), r); #[cfg_attr(rustfmt, rustfmt_skip)] - let a = i8x16::new( + let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); - let r = sse2::_mm_slli_si128(a, -0x80000000); - assert_eq!(r, i8x16::splat(0)); + let r = _mm_slli_si128(a, -0x80000000); + assert_eq!(r, _mm_set1_epi8(0)); } #[simd_test = "sse2"] - unsafe fn _mm_slli_epi16() { + unsafe fn test_mm_slli_epi16() { #[cfg_attr(rustfmt, rustfmt_skip)] - let a = i16x8::new( + let a = _mm_setr_epi16( 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0, ); - let r = sse2::_mm_slli_epi16(a, 4); + let r = _mm_slli_epi16(a, 4); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = i16x8::new( + let e = _mm_setr_epi16( 0xFFF0 as u16 as i16, 0xFFF0 as u16 as i16, 0x0FF0, 0x00F0, 0, 0, 0, 0, ); @@ -2745,1140 +2771,1085 @@ mod tests { } #[simd_test = "sse2"] - unsafe fn _mm_sll_epi16() { - let a = i16x8::new(0xFF, 0, 0, 0, 0, 0, 0, 0); - let r = sse2::_mm_sll_epi16(a, i16x8::new(4, 0, 0, 0, 0, 0, 0, 0)); - assert_eq!(r, i16x8::new(0xFF0, 0, 0, 0, 0, 0, 0, 0)); - let r = sse2::_mm_sll_epi16(a, i16x8::new(0, 0, 0, 0, 4, 0, 0, 0)); - assert_eq!(r, i16x8::new(0xFF, 0, 0, 0, 0, 0, 0, 0)); + unsafe fn test_mm_sll_epi16() { + let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0); + let r = _mm_sll_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0)); + assert_eq!(r, _mm_setr_epi16(0xFF0, 0, 0, 0, 0, 0, 0, 0)); + let r = _mm_sll_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0)); + assert_eq!(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_slli_epi32() { - let r = sse2::_mm_slli_epi32(i32x4::splat(0xFFFF), 4); - assert_eq!(r, i32x4::splat(0xFFFF0)); + unsafe fn test_mm_slli_epi32() { + let r = _mm_slli_epi32(_mm_set1_epi32(0xFFFF), 4); + assert_eq!(r, _mm_set1_epi32(0xFFFF0)); } #[simd_test = "sse2"] - unsafe fn _mm_sll_epi32() { - let a = i32x4::splat(0xFFFF); - let b = i32x4::new(4, 0, 0, 0); - let r = sse2::_mm_sll_epi32(a, b); - assert_eq!(r, i32x4::splat(0xFFFF0)); + unsafe fn test_mm_sll_epi32() { + let a = _mm_set1_epi32(0xFFFF); + let b = _mm_setr_epi32(4, 0, 0, 0); + let r = _mm_sll_epi32(a, b); + assert_eq!(r, _mm_set1_epi32(0xFFFF0)); } #[simd_test = "sse2"] - unsafe fn _mm_slli_epi64() { - let r = sse2::_mm_slli_epi64(i64x2::splat(0xFFFFFFFF), 4); - assert_eq!(r, i64x2::splat(0xFFFFFFFF0)); + unsafe fn test_mm_slli_epi64() { + let r = _mm_slli_epi64(_mm_set1_epi64x(0xFFFFFFFF), 4); + assert_eq!(r, _mm_set1_epi64x(0xFFFFFFFF0)); } #[simd_test = "sse2"] - unsafe fn _mm_sll_epi64() { - let a = i64x2::splat(0xFFFFFFFF); - let b = i64x2::new(4, 0); - let r = sse2::_mm_sll_epi64(a, b); - assert_eq!(r, i64x2::splat(0xFFFFFFFF0)); + unsafe fn test_mm_sll_epi64() { + let a = _mm_set1_epi64x(0xFFFFFFFF); + let b = _mm_setr_epi64x(4, 0); + let r = _mm_sll_epi64(a, b); + assert_eq!(r, _mm_set1_epi64x(0xFFFFFFFF0)); } #[simd_test = "sse2"] - unsafe fn _mm_srai_epi16() { - let r = sse2::_mm_srai_epi16(i16x8::splat(-1), 1); - assert_eq!(r, i16x8::splat(-1)); + unsafe fn test_mm_srai_epi16() { + let r = _mm_srai_epi16(_mm_set1_epi16(-1), 1); + assert_eq!(r, _mm_set1_epi16(-1)); } #[simd_test = "sse2"] - unsafe fn _mm_sra_epi16() { - let a = i16x8::splat(-1); - let b = i16x8::new(1, 0, 0, 0, 0, 0, 0, 0); - let r = sse2::_mm_sra_epi16(a, b); - assert_eq!(r, i16x8::splat(-1)); + unsafe fn test_mm_sra_epi16() { + let a = _mm_set1_epi16(-1); + let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); + let r = _mm_sra_epi16(a, b); + assert_eq!(r, _mm_set1_epi16(-1)); } #[simd_test = "sse2"] - unsafe fn _mm_srai_epi32() { - let r = sse2::_mm_srai_epi32(i32x4::splat(-1), 1); - assert_eq!(r, i32x4::splat(-1)); + unsafe fn test_mm_srai_epi32() { + let r = _mm_srai_epi32(_mm_set1_epi32(-1), 1); + assert_eq!(r, _mm_set1_epi32(-1)); } #[simd_test = "sse2"] - unsafe fn _mm_sra_epi32() { - let a = i32x4::splat(-1); - let b = i32x4::new(1, 0, 0, 0); - let r = sse2::_mm_sra_epi32(a, b); - assert_eq!(r, i32x4::splat(-1)); + unsafe fn test_mm_sra_epi32() { + let a = _mm_set1_epi32(-1); + let b = _mm_setr_epi32(1, 0, 0, 0); + let r = _mm_sra_epi32(a, b); + assert_eq!(r, _mm_set1_epi32(-1)); } #[simd_test = "sse2"] - unsafe fn _mm_srli_si128() { + unsafe fn test_mm_srli_si128() { #[cfg_attr(rustfmt, rustfmt_skip)] - let a = i8x16::new( + let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); - let r = sse2::_mm_srli_si128(a, 1); + let r = _mm_srli_si128(a, 1); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = i8x16::new( + let e = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, ); assert_eq!(r, e); #[cfg_attr(rustfmt, rustfmt_skip)] - let a = i8x16::new( + let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); - let r = sse2::_mm_srli_si128(a, 15); - let e = i8x16::new(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r = _mm_srli_si128(a, 15); + let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq!(r, e); #[cfg_attr(rustfmt, rustfmt_skip)] - let a = i8x16::new( + let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); - let r = sse2::_mm_srli_si128(a, 16); - assert_eq!(r, i8x16::splat(0)); + let r = _mm_srli_si128(a, 16); + assert_eq!(r, _mm_set1_epi8(0)); #[cfg_attr(rustfmt, rustfmt_skip)] - let a = i8x16::new( + let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); - let r = sse2::_mm_srli_si128(a, -1); - assert_eq!(r, i8x16::splat(0)); + let r = _mm_srli_si128(a, -1); + assert_eq!(r, _mm_set1_epi8(0)); #[cfg_attr(rustfmt, rustfmt_skip)] - let a = i8x16::new( + let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); - let r = sse2::_mm_srli_si128(a, -0x80000000); - assert_eq!(r, i8x16::splat(0)); + let r = _mm_srli_si128(a, -0x80000000); + assert_eq!(r, _mm_set1_epi8(0)); } #[simd_test = "sse2"] - unsafe fn _mm_srli_epi16() { + unsafe fn test_mm_srli_epi16() { #[cfg_attr(rustfmt, rustfmt_skip)] - let a = i16x8::new( + let a = _mm_setr_epi16( 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0, ); - let r = sse2::_mm_srli_epi16(a, 4); + let r = _mm_srli_epi16(a, 4); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = i16x8::new( + let e = _mm_setr_epi16( 0xFFF as u16 as i16, 0xFF as u16 as i16, 0xF, 0, 0, 0, 0, 0, ); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_srl_epi16() { - let a = i16x8::new(0xFF, 0, 0, 0, 0, 0, 0, 0); - let r = sse2::_mm_srl_epi16(a, i16x8::new(4, 0, 0, 0, 0, 0, 0, 0)); - assert_eq!(r, i16x8::new(0xF, 0, 0, 0, 0, 0, 0, 0)); - let r = sse2::_mm_srl_epi16(a, i16x8::new(0, 0, 0, 0, 4, 0, 0, 0)); - assert_eq!(r, i16x8::new(0xFF, 0, 0, 0, 0, 0, 0, 0)); + unsafe fn test_mm_srl_epi16() { + let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0); + let r = _mm_srl_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0)); + assert_eq!(r, _mm_setr_epi16(0xF, 0, 0, 0, 0, 0, 0, 0)); + let r = _mm_srl_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0)); + assert_eq!(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_srli_epi32() { - let r = sse2::_mm_srli_epi32(i32x4::splat(0xFFFF), 4); - assert_eq!(r, i32x4::splat(0xFFF)); + unsafe fn test_mm_srli_epi32() { + let r = _mm_srli_epi32(_mm_set1_epi32(0xFFFF), 4); + assert_eq!(r, _mm_set1_epi32(0xFFF)); } #[simd_test = "sse2"] - unsafe fn _mm_srl_epi32() { - let a = i32x4::splat(0xFFFF); - let b = i32x4::new(4, 0, 0, 0); - let r = sse2::_mm_srl_epi32(a, b); - assert_eq!(r, i32x4::splat(0xFFF)); + unsafe fn test_mm_srl_epi32() { + let a = _mm_set1_epi32(0xFFFF); + let b = _mm_setr_epi32(4, 0, 0, 0); + let r = _mm_srl_epi32(a, b); + assert_eq!(r, _mm_set1_epi32(0xFFF)); } #[simd_test = "sse2"] - unsafe fn _mm_srli_epi64() { - let r = sse2::_mm_srli_epi64(i64x2::splat(0xFFFFFFFF), 4); - assert_eq!(r, i64x2::splat(0xFFFFFFF)); + unsafe fn test_mm_srli_epi64() { + let r = _mm_srli_epi64(_mm_set1_epi64x(0xFFFFFFFF), 4); + assert_eq!(r, _mm_set1_epi64x(0xFFFFFFF)); } #[simd_test = "sse2"] - unsafe fn _mm_srl_epi64() { - let a = i64x2::splat(0xFFFFFFFF); - let b = i64x2::new(4, 0); - let r = sse2::_mm_srl_epi64(a, b); - assert_eq!(r, i64x2::splat(0xFFFFFFF)); + unsafe fn test_mm_srl_epi64() { + let a = _mm_set1_epi64x(0xFFFFFFFF); + let b = _mm_setr_epi64x(4, 0); + let r = _mm_srl_epi64(a, b); + assert_eq!(r, _mm_set1_epi64x(0xFFFFFFF)); } #[simd_test = "sse2"] - unsafe fn _mm_and_si128() { - let a = __m128i::from(i8x16::splat(5)); - let b = __m128i::from(i8x16::splat(3)); - let r = sse2::_mm_and_si128(a, b); - assert_eq!(r, __m128i::from(i8x16::splat(1))); + unsafe fn test_mm_and_si128() { + let a = _mm_set1_epi8(5); + let b = _mm_set1_epi8(3); + let r = _mm_and_si128(a, b); + assert_eq!(r, _mm_set1_epi8(1)); } #[simd_test = "sse2"] - unsafe fn _mm_andnot_si128() { - let a = __m128i::from(i8x16::splat(5)); - let b = __m128i::from(i8x16::splat(3)); - let r = sse2::_mm_andnot_si128(a, b); - assert_eq!(r, __m128i::from(i8x16::splat(2))); + unsafe fn test_mm_andnot_si128() { + let a = _mm_set1_epi8(5); + let b = _mm_set1_epi8(3); + let r = _mm_andnot_si128(a, b); + assert_eq!(r, _mm_set1_epi8(2)); } #[simd_test = "sse2"] - unsafe fn _mm_or_si128() { - let a = __m128i::from(i8x16::splat(5)); - let b = __m128i::from(i8x16::splat(3)); - let r = sse2::_mm_or_si128(a, b); - assert_eq!(r, __m128i::from(i8x16::splat(7))); + unsafe fn test_mm_or_si128() { + let a = _mm_set1_epi8(5); + let b = _mm_set1_epi8(3); + let r = _mm_or_si128(a, b); + assert_eq!(r, _mm_set1_epi8(7)); } #[simd_test = "sse2"] - unsafe fn _mm_xor_si128() { - let a = __m128i::from(i8x16::splat(5)); - let b = __m128i::from(i8x16::splat(3)); - let r = sse2::_mm_xor_si128(a, b); - assert_eq!(r, __m128i::from(i8x16::splat(6))); + unsafe fn test_mm_xor_si128() { + let a = _mm_set1_epi8(5); + let b = _mm_set1_epi8(3); + let r = _mm_xor_si128(a, b); + assert_eq!(r, _mm_set1_epi8(6)); } #[simd_test = "sse2"] - unsafe fn _mm_cmpeq_epi8() { + unsafe fn test_mm_cmpeq_epi8() { let a = - i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); let b = - i8x16::new(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - let r = sse2::_mm_cmpeq_epi8(a, b); + _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm_cmpeq_epi8(a, b); #[cfg_attr(rustfmt, rustfmt_skip)] assert_eq!( r, - i8x16::new( + _mm_setr_epi8( 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) ); } #[simd_test = "sse2"] - unsafe fn _mm_cmpeq_epi16() { - let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b = i16x8::new(7, 6, 2, 4, 3, 2, 1, 0); - let r = sse2::_mm_cmpeq_epi16(a, b); - assert_eq!(r, i16x8::splat(0).replace(2, 0xFFFFu16 as i16)); + unsafe fn test_mm_cmpeq_epi16() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0); + let r = _mm_cmpeq_epi16(a, b); + assert_eq!(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_cmpeq_epi32() { - let a = i32x4::new(0, 1, 2, 3); - let b = i32x4::new(3, 2, 2, 0); - let r = sse2::_mm_cmpeq_epi32(a, b); - assert_eq!(r, i32x4::splat(0).replace(2, 0xFFFFFFFFu32 as i32)); + unsafe fn test_mm_cmpeq_epi32() { + let a = _mm_setr_epi32(0, 1, 2, 3); + let b = _mm_setr_epi32(3, 2, 2, 0); + let r = _mm_cmpeq_epi32(a, b); + assert_eq!(r, _mm_setr_epi32(0, 0, !0, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_cmpgt_epi8() { - let a = i8x16::splat(0).replace(0, 5); - let b = i8x16::splat(0); - let r = sse2::_mm_cmpgt_epi8(a, b); - assert_eq!(r, i8x16::splat(0).replace(0, 0xFFu8 as i8)); + unsafe fn test_mm_cmpgt_epi8() { + let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let b = _mm_set1_epi8(0); + let r = _mm_cmpgt_epi8(a, b); + let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpgt_epi16() { - let a = i16x8::splat(0).replace(0, 5); - let b = i16x8::splat(0); - let r = sse2::_mm_cmpgt_epi16(a, b); - assert_eq!(r, i16x8::splat(0).replace(0, 0xFFFFu16 as i16)); + unsafe fn test_mm_cmpgt_epi16() { + let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); + let b = _mm_set1_epi16(0); + let r = _mm_cmpgt_epi16(a, b); + let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); + assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpgt_epi32() { - let a = i32x4::splat(0).replace(0, 5); - let b = i32x4::splat(0); - let r = sse2::_mm_cmpgt_epi32(a, b); - assert_eq!(r, i32x4::splat(0).replace(0, 0xFFFFFFFFu32 as i32)); + unsafe fn test_mm_cmpgt_epi32() { + let a = _mm_set_epi32(5, 0, 0, 0); + let b = _mm_set1_epi32(0); + let r = _mm_cmpgt_epi32(a, b); + assert_eq!(r, _mm_set_epi32(!0, 0, 0, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_cmplt_epi8() { - let a = i8x16::splat(0); - let b = i8x16::splat(0).replace(0, 5); - let r = sse2::_mm_cmplt_epi8(a, b); - assert_eq!(r, i8x16::splat(0).replace(0, 0xFFu8 as i8)); + unsafe fn test_mm_cmplt_epi8() { + let a = _mm_set1_epi8(0); + let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r = _mm_cmplt_epi8(a, b); + let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmplt_epi16() { - let a = i16x8::splat(0); - let b = i16x8::splat(0).replace(0, 5); - let r = sse2::_mm_cmplt_epi16(a, b); - assert_eq!(r, i16x8::splat(0).replace(0, 0xFFFFu16 as i16)); + unsafe fn test_mm_cmplt_epi16() { + let a = _mm_set1_epi16(0); + let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); + let r = _mm_cmplt_epi16(a, b); + let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); + assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmplt_epi32() { - let a = i32x4::splat(0); - let b = i32x4::splat(0).replace(0, 5); - let r = sse2::_mm_cmplt_epi32(a, b); - assert_eq!(r, i32x4::splat(0).replace(0, 0xFFFFFFFFu32 as i32)); + unsafe fn test_mm_cmplt_epi32() { + let a = _mm_set1_epi32(0); + let b = _mm_set_epi32(5, 0, 0, 0); + let r = _mm_cmplt_epi32(a, b); + assert_eq!(r, _mm_set_epi32(!0, 0, 0, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_cvtepi32_pd() { - let a = sse2::_mm_set_epi32(35, 25, 15, 5); - let r = sse2::_mm_cvtepi32_pd(a); - assert_eq!(r, f64x2::new(5.0, 15.0)); + unsafe fn test_mm_cvtepi32_pd() { + let a = _mm_set_epi32(35, 25, 15, 5); + let r = _mm_cvtepi32_pd(a); + assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0)); } #[simd_test = "sse2"] - unsafe fn _mm_cvtsi32_sd() { - let a = f64x2::splat(3.5); - let r = sse2::_mm_cvtsi32_sd(a, 5); - assert_eq!(r, f64x2::new(5.0, 3.5)); + unsafe fn test_mm_cvtsi32_sd() { + let a = _mm_set1_pd(3.5); + let r = _mm_cvtsi32_sd(a, 5); + assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5)); } #[simd_test = "sse2"] - unsafe fn _mm_cvtepi32_ps() { - let a = i32x4::new(1, 2, 3, 4); - let r = sse2::_mm_cvtepi32_ps(a); - assert_eq!(r, f32x4::new(1.0, 2.0, 3.0, 4.0)); + unsafe fn test_mm_cvtepi32_ps() { + let a = _mm_setr_epi32(1, 2, 3, 4); + let r = _mm_cvtepi32_ps(a); + assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0)); } #[simd_test = "sse2"] - unsafe fn _mm_cvtps_epi32() { - let a = f32x4::new(1.0, 2.0, 3.0, 4.0); - let r = sse2::_mm_cvtps_epi32(a); - assert_eq!(r, i32x4::new(1, 2, 3, 4)); + unsafe fn test_mm_cvtps_epi32() { + let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); + let r = _mm_cvtps_epi32(a); + assert_eq!(r, _mm_setr_epi32(1, 2, 3, 4)); } #[simd_test = "sse2"] - unsafe fn _mm_cvtsi32_si128() { - let r = sse2::_mm_cvtsi32_si128(5); - assert_eq!(r, i32x4::new(5, 0, 0, 0)); + unsafe fn test_mm_cvtsi32_si128() { + let r = _mm_cvtsi32_si128(5); + assert_eq!(r, _mm_setr_epi32(5, 0, 0, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_cvtsi128_si32() { - let r = sse2::_mm_cvtsi128_si32(i32x4::new(5, 0, 0, 0)); + unsafe fn test_mm_cvtsi128_si32() { + let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0)); assert_eq!(r, 5); } #[simd_test = "sse2"] - unsafe fn _mm_set_epi64x() { - let r = sse2::_mm_set_epi64x(0, 1); - assert_eq!(r, i64x2::new(1, 0)); + unsafe fn test_mm_set_epi64x() { + let r = _mm_set_epi64x(0, 1); + assert_eq!(r, _mm_setr_epi64x(1, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_set_epi32() { - let r = sse2::_mm_set_epi32(0, 1, 2, 3); - assert_eq!(r, i32x4::new(3, 2, 1, 0)); + unsafe fn test_mm_set_epi32() { + let r = _mm_set_epi32(0, 1, 2, 3); + assert_eq!(r, _mm_setr_epi32(3, 2, 1, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_set_epi16() { - let r = sse2::_mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); - assert_eq!(r, i16x8::new(7, 6, 5, 4, 3, 2, 1, 0)); + unsafe fn test_mm_set_epi16() { + let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq!(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_set_epi8() { + unsafe fn test_mm_set_epi8() { #[cfg_attr(rustfmt, rustfmt_skip)] - let r = sse2::_mm_set_epi8( + let r = _mm_set_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); let e = - i8x16::new(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_set1_epi64x() { - let r = sse2::_mm_set1_epi64x(1); - assert_eq!(r, i64x2::splat(1)); + unsafe fn test_mm_set1_epi64x() { + let r = _mm_set1_epi64x(1); + assert_eq!(r, _mm_set1_epi64x(1)); } #[simd_test = "sse2"] - unsafe fn _mm_set1_epi32() { - let r = sse2::_mm_set1_epi32(1); - assert_eq!(r, i32x4::splat(1)); + unsafe fn test_mm_set1_epi32() { + let r = _mm_set1_epi32(1); + assert_eq!(r, _mm_set1_epi32(1)); } #[simd_test = "sse2"] - unsafe fn _mm_set1_epi16() { - let r = sse2::_mm_set1_epi16(1); - assert_eq!(r, i16x8::splat(1)); + unsafe fn test_mm_set1_epi16() { + let r = _mm_set1_epi16(1); + assert_eq!(r, _mm_set1_epi16(1)); } #[simd_test = "sse2"] - unsafe fn _mm_set1_epi8() { - let r = sse2::_mm_set1_epi8(1); - assert_eq!(r, i8x16::splat(1)); + unsafe fn test_mm_set1_epi8() { + let r = _mm_set1_epi8(1); + assert_eq!(r, _mm_set1_epi8(1)); } #[simd_test = "sse2"] - unsafe fn _mm_setr_epi32() { - let r = sse2::_mm_setr_epi32(0, 1, 2, 3); - assert_eq!(r, i32x4::new(0, 1, 2, 3)); + unsafe fn test_mm_setr_epi32() { + let r = _mm_setr_epi32(0, 1, 2, 3); + assert_eq!(r, _mm_setr_epi32(0, 1, 2, 3)); } #[simd_test = "sse2"] - unsafe fn _mm_setr_epi16() { - let r = sse2::_mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); - assert_eq!(r, i16x8::new(0, 1, 2, 3, 4, 5, 6, 7)); + unsafe fn test_mm_setr_epi16() { + let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq!(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7)); } #[simd_test = "sse2"] - unsafe fn _mm_setr_epi8() { + unsafe fn test_mm_setr_epi8() { #[cfg_attr(rustfmt, rustfmt_skip)] - let r = sse2::_mm_setr_epi8( + let r = _mm_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); let e = - i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_setzero_si128() { - let r = sse2::_mm_setzero_si128(); - assert_eq!(r, __m128i::from(i64x2::splat(0))); + unsafe fn test_mm_setzero_si128() { + let r = _mm_setzero_si128(); + assert_eq!(r, _mm_set1_epi64x(0)); } #[simd_test = "sse2"] - unsafe fn _mm_loadl_epi64() { - let a = i64x2::new(6, 5); - let r = sse2::_mm_loadl_epi64(&a as *const _); - assert_eq!(r, i64x2::new(6, 0)); + unsafe fn test_mm_loadl_epi64() { + let a = _mm_setr_epi64x(6, 5); + let r = _mm_loadl_epi64(&a as *const _); + assert_eq!(r, _mm_setr_epi64x(6, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_load_si128() { - let a = sse2::_mm_set_epi64x(5, 6); - let r = sse2::_mm_load_si128(&a as *const _ as *const _); - assert_eq!(a, i64x2::from(r)); + unsafe fn test_mm_load_si128() { + let a = _mm_set_epi64x(5, 6); + let r = _mm_load_si128(&a as *const _ as *const _); + assert_eq!(a, r); } #[simd_test = "sse2"] - unsafe fn _mm_loadu_si128() { - let a = sse2::_mm_set_epi64x(5, 6); - let r = sse2::_mm_loadu_si128(&a as *const _ as *const _); - assert_eq!(a, i64x2::from(r)); + unsafe fn test_mm_loadu_si128() { + let a = _mm_set_epi64x(5, 6); + let r = _mm_loadu_si128(&a as *const _ as *const _); + assert_eq!(a, r); } #[simd_test = "sse2"] - unsafe fn _mm_maskmoveu_si128() { - let a = i8x16::splat(9); - let mask = i8x16::splat(0).replace(2, 0x80u8 as i8); - let mut r = i8x16::splat(0); - sse2::_mm_maskmoveu_si128(a, mask, &mut r as *mut _ as *mut i8); - assert_eq!(r, i8x16::splat(0).replace(2, 9)); + unsafe fn test_mm_maskmoveu_si128() { + let a = _mm_set1_epi8(9); + let mask = _mm_set_epi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let mut r = _mm_set1_epi8(0); + _mm_maskmoveu_si128(a, mask, &mut r as *mut _ as *mut i8); + let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_store_si128() { - let a = i8x16::splat(9); - let mut r = i8x16::splat(0); - sse2::_mm_store_si128( + unsafe fn test_mm_store_si128() { + let a = _mm_set1_epi8(9); + let mut r = _mm_set1_epi8(0); + _mm_store_si128( &mut r as *mut _ as *mut __m128i, - __m128i::from(a), + a, ); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_storeu_si128() { - let a = i8x16::splat(9); - let mut r = i8x16::splat(0); - sse2::_mm_storeu_si128( + unsafe fn test_mm_storeu_si128() { + let a = _mm_set1_epi8(9); + let mut r = _mm_set1_epi8(0); + _mm_storeu_si128( &mut r as *mut _ as *mut __m128i, - __m128i::from(a), + a, ); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_storel_epi64() { - let a = i64x2::new(2, 9); - let mut r = i8x16::splat(0); - sse2::_mm_storel_epi64( + unsafe fn test_mm_storel_epi64() { + let a = _mm_setr_epi64x(2, 9); + let mut r = _mm_set1_epi8(0); + _mm_storel_epi64( &mut r as *mut _ as *mut __m128i, - __m128i::from(a), + a, ); - assert_eq!(r, i8x16::from(i64x2::new(2, 0))); + assert_eq!(r, _mm_setr_epi64x(2, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_stream_si128() { - let a = __m128i::from(sse2::_mm_setr_epi32(1, 2, 3, 4)); - let mut r = sse2::_mm_undefined_si128(); - sse2::_mm_stream_si128(&mut r as *mut _, a); + unsafe fn test_mm_stream_si128() { + let a = _mm_setr_epi32(1, 2, 3, 4); + let mut r = _mm_undefined_si128(); + _mm_stream_si128(&mut r as *mut _, a); assert_eq!(r, a); } #[simd_test = "sse2"] - unsafe fn _mm_stream_si32() { + unsafe fn test_mm_stream_si32() { let a: i32 = 7; let mut mem = ::std::boxed::Box::::new(-1); - sse2::_mm_stream_si32(&mut *mem as *mut i32, a); + _mm_stream_si32(&mut *mem as *mut i32, a); assert_eq!(a, *mem); } #[simd_test = "sse2"] - unsafe fn _mm_move_epi64() { - let a = i64x2::new(5, 6); - let r = sse2::_mm_move_epi64(a); - assert_eq!(r, i64x2::new(5, 0)); + unsafe fn test_mm_move_epi64() { + let a = _mm_setr_epi64x(5, 6); + let r = _mm_move_epi64(a); + assert_eq!(r, _mm_setr_epi64x(5, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_packs_epi16() { - let a = i16x8::new(0x80, -0x81, 0, 0, 0, 0, 0, 0); - let b = i16x8::new(0, 0, 0, 0, 0, 0, -0x81, 0x80); - let r = sse2::_mm_packs_epi16(a, b); + unsafe fn test_mm_packs_epi16() { + let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0); + let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80); + let r = _mm_packs_epi16(a, b); #[cfg_attr(rustfmt, rustfmt_skip)] assert_eq!( r, - i8x16::new( + _mm_setr_epi8( 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F ) ); } #[simd_test = "sse2"] - unsafe fn _mm_packs_epi32() { - let a = i32x4::new(0x8000, -0x8001, 0, 0); - let b = i32x4::new(0, 0, -0x8001, 0x8000); - let r = sse2::_mm_packs_epi32(a, b); + unsafe fn test_mm_packs_epi32() { + let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0); + let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000); + let r = _mm_packs_epi32(a, b); assert_eq!( r, - i16x8::new(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF) + _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF) ); } #[simd_test = "sse2"] - unsafe fn _mm_packus_epi16() { - let a = i16x8::new(0x100, -1, 0, 0, 0, 0, 0, 0); - let b = i16x8::new(0, 0, 0, 0, 0, 0, -1, 0x100); - let r = sse2::_mm_packus_epi16(a, b); + unsafe fn test_mm_packus_epi16() { + let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0); + let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100); + let r = _mm_packus_epi16(a, b); assert_eq!( r, - u8x16::new(0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF) + _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0) ); } #[simd_test = "sse2"] - unsafe fn _mm_extract_epi16() { - let a = i16x8::new(-1, 1, 2, 3, 4, 5, 6, 7); - let r1 = sse2::_mm_extract_epi16(a, 0); - let r2 = sse2::_mm_extract_epi16(a, 11); - assert_eq!(r1, 0xFFFF); + unsafe fn test_mm_extract_epi16() { + let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7); + let r1 = _mm_extract_epi16(a, 0); + let r2 = _mm_extract_epi16(a, 11); + assert_eq!(r1, -1); assert_eq!(r2, 3); } #[simd_test = "sse2"] - unsafe fn _mm_insert_epi16() { - let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let r = sse2::_mm_insert_epi16(a, 9, 0); - assert_eq!(r, a.replace(0, 9)); + unsafe fn test_mm_insert_epi16() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm_insert_epi16(a, 9, 0); + let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7); + assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_movemask_epi8() { + unsafe fn test_mm_movemask_epi8() { #[cfg_attr(rustfmt, rustfmt_skip)] - let a = i8x16::from(u8x16::new( - 0b1000_0000, 0b0, 0b1000_0000, 0b01, 0b0101, 0b1111_0000, 0, 0, - 0, 0, 0b1111_0000, 0b0101, 0b01, 0b1000_0000, 0b0, 0b1000_0000, )); - let r = sse2::_mm_movemask_epi8(a); + let a = _mm_setr_epi8( + 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01, + 0b0101, 0b1111_0000u8 as i8, 0, 0, + 0, 0, 0b1111_0000u8 as i8, 0b0101, + 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, + ); + let r = _mm_movemask_epi8(a); assert_eq!(r, 0b10100100_00100101); } #[simd_test = "sse2"] - unsafe fn _mm_shuffle_epi32() { - let a = i32x4::new(5, 10, 15, 20); - let r = sse2::_mm_shuffle_epi32(a, 0b00_01_01_11); - let e = i32x4::new(20, 10, 10, 5); + unsafe fn test_mm_shuffle_epi32() { + let a = _mm_setr_epi32(5, 10, 15, 20); + let r = _mm_shuffle_epi32(a, 0b00_01_01_11); + let e = _mm_setr_epi32(20, 10, 10, 5); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_shufflehi_epi16() { - let a = i16x8::new(1, 2, 3, 4, 5, 10, 15, 20); - let r = sse2::_mm_shufflehi_epi16(a, 0b00_01_01_11); - let e = i16x8::new(1, 2, 3, 4, 20, 10, 10, 5); + unsafe fn test_mm_shufflehi_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20); + let r = _mm_shufflehi_epi16(a, 0b00_01_01_11); + let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_shufflelo_epi16() { - let a = i16x8::new(5, 10, 15, 20, 1, 2, 3, 4); - let r = sse2::_mm_shufflelo_epi16(a, 0b00_01_01_11); - let e = i16x8::new(20, 10, 10, 5, 1, 2, 3, 4); + unsafe fn test_mm_shufflelo_epi16() { + let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4); + let r = _mm_shufflelo_epi16(a, 0b00_01_01_11); + let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_unpackhi_epi8() { + unsafe fn test_mm_unpackhi_epi8() { let a = - i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); #[cfg_attr(rustfmt, rustfmt_skip)] - let b = i8x16::new( + let b = _mm_setr_epi8( 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); - let r = sse2::_mm_unpackhi_epi8(a, b); + let r = _mm_unpackhi_epi8(a, b); #[cfg_attr(rustfmt, rustfmt_skip)] - let e = i8x16::new( + let e = _mm_setr_epi8( 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, ); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_unpackhi_epi16() { - let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15); - let r = sse2::_mm_unpackhi_epi16(a, b); - let e = i16x8::new(4, 12, 5, 13, 6, 14, 7, 15); + unsafe fn test_mm_unpackhi_epi16() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_unpackhi_epi16(a, b); + let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_unpackhi_epi32() { - let a = i32x4::new(0, 1, 2, 3); - let b = i32x4::new(4, 5, 6, 7); - let r = sse2::_mm_unpackhi_epi32(a, b); - let e = i32x4::new(2, 6, 3, 7); + unsafe fn test_mm_unpackhi_epi32() { + let a = _mm_setr_epi32(0, 1, 2, 3); + let b = _mm_setr_epi32(4, 5, 6, 7); + let r = _mm_unpackhi_epi32(a, b); + let e = _mm_setr_epi32(2, 6, 3, 7); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_unpackhi_epi64() { - let a = i64x2::new(0, 1); - let b = i64x2::new(2, 3); - let r = sse2::_mm_unpackhi_epi64(a, b); - let e = i64x2::new(1, 3); + unsafe fn test_mm_unpackhi_epi64() { + let a = _mm_setr_epi64x(0, 1); + let b = _mm_setr_epi64x(2, 3); + let r = _mm_unpackhi_epi64(a, b); + let e = _mm_setr_epi64x(1, 3); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_unpacklo_epi8() { + unsafe fn test_mm_unpacklo_epi8() { let a = - i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); #[cfg_attr(rustfmt, rustfmt_skip)] - let b = i8x16::new( + let b = _mm_setr_epi8( 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); - let r = sse2::_mm_unpacklo_epi8(a, b); + let r = _mm_unpacklo_epi8(a, b); let e = - i8x16::new(0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + _mm_setr_epi8(0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_unpacklo_epi16() { - let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15); - let r = sse2::_mm_unpacklo_epi16(a, b); - let e = i16x8::new(0, 8, 1, 9, 2, 10, 3, 11); + unsafe fn test_mm_unpacklo_epi16() { + let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_unpacklo_epi16(a, b); + let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_unpacklo_epi32() { - let a = i32x4::new(0, 1, 2, 3); - let b = i32x4::new(4, 5, 6, 7); - let r = sse2::_mm_unpacklo_epi32(a, b); - let e = i32x4::new(0, 4, 1, 5); + unsafe fn test_mm_unpacklo_epi32() { + let a = _mm_setr_epi32(0, 1, 2, 3); + let b = _mm_setr_epi32(4, 5, 6, 7); + let r = _mm_unpacklo_epi32(a, b); + let e = _mm_setr_epi32(0, 4, 1, 5); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_unpacklo_epi64() { - let a = i64x2::new(0, 1); - let b = i64x2::new(2, 3); - let r = sse2::_mm_unpacklo_epi64(a, b); - let e = i64x2::new(0, 2); + unsafe fn test_mm_unpacklo_epi64() { + let a = _mm_setr_epi64x(0, 1); + let b = _mm_setr_epi64x(2, 3); + let r = _mm_unpacklo_epi64(a, b); + let e = _mm_setr_epi64x(0, 2); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_add_sd() { - let a = f64x2::new(1.0, 2.0); - let b = f64x2::new(5.0, 10.0); - let r = sse2::_mm_add_sd(a, b); - assert_eq!(r, f64x2::new(6.0, 2.0)); + unsafe fn test_mm_add_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_add_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0)); } #[simd_test = "sse2"] - unsafe fn _mm_add_pd() { - let a = f64x2::new(1.0, 2.0); - let b = f64x2::new(5.0, 10.0); - let r = sse2::_mm_add_pd(a, b); - assert_eq!(r, f64x2::new(6.0, 12.0)); + unsafe fn test_mm_add_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_add_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0)); } #[simd_test = "sse2"] - unsafe fn _mm_div_sd() { - let a = f64x2::new(1.0, 2.0); - let b = f64x2::new(5.0, 10.0); - let r = sse2::_mm_div_sd(a, b); - assert_eq!(r, f64x2::new(0.2, 2.0)); + unsafe fn test_mm_div_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_div_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0)); } #[simd_test = "sse2"] - unsafe fn _mm_div_pd() { - let a = f64x2::new(1.0, 2.0); - let b = f64x2::new(5.0, 10.0); - let r = sse2::_mm_div_pd(a, b); - assert_eq!(r, f64x2::new(0.2, 0.2)); + unsafe fn test_mm_div_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_div_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2)); } #[simd_test = "sse2"] - unsafe fn _mm_max_sd() { - let a = f64x2::new(1.0, 2.0); - let b = f64x2::new(5.0, 10.0); - let r = sse2::_mm_max_sd(a, b); - assert_eq!(r, f64x2::new(5.0, 2.0)); + unsafe fn test_mm_max_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_max_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); } #[simd_test = "sse2"] - unsafe fn _mm_max_pd() { - let a = f64x2::new(1.0, 2.0); - let b = f64x2::new(5.0, 10.0); - let r = sse2::_mm_max_pd(a, b); - assert_eq!(r, f64x2::new(5.0, 10.0)); + unsafe fn test_mm_max_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_max_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0)); } #[simd_test = "sse2"] - unsafe fn _mm_min_sd() { - let a = f64x2::new(1.0, 2.0); - let b = f64x2::new(5.0, 10.0); - let r = sse2::_mm_min_sd(a, b); - assert_eq!(r, f64x2::new(1.0, 2.0)); + unsafe fn test_mm_min_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_min_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); } #[simd_test = "sse2"] - unsafe fn _mm_min_pd() { - let a = f64x2::new(1.0, 2.0); - let b = f64x2::new(5.0, 10.0); - let r = sse2::_mm_min_pd(a, b); - assert_eq!(r, f64x2::new(1.0, 2.0)); + unsafe fn test_mm_min_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_min_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); } #[simd_test = "sse2"] - unsafe fn _mm_mul_sd() { - let a = f64x2::new(1.0, 2.0); - let b = f64x2::new(5.0, 10.0); - let r = sse2::_mm_mul_sd(a, b); - assert_eq!(r, f64x2::new(5.0, 2.0)); + unsafe fn test_mm_mul_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_mul_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); } #[simd_test = "sse2"] - unsafe fn _mm_mul_pd() { - let a = f64x2::new(1.0, 2.0); - let b = f64x2::new(5.0, 10.0); - let r = sse2::_mm_mul_pd(a, b); - assert_eq!(r, f64x2::new(5.0, 20.0)); + unsafe fn test_mm_mul_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_mul_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0)); } #[simd_test = "sse2"] - unsafe fn _mm_sqrt_sd() { - let a = f64x2::new(1.0, 2.0); - let b = f64x2::new(5.0, 10.0); - let r = sse2::_mm_sqrt_sd(a, b); - assert_eq!(r, f64x2::new(5.0f64.sqrt(), 2.0)); + unsafe fn test_mm_sqrt_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_sqrt_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0)); } #[simd_test = "sse2"] - unsafe fn _mm_sqrt_pd() { - let r = sse2::_mm_sqrt_pd(f64x2::new(1.0, 2.0)); - assert_eq!(r, f64x2::new(1.0f64.sqrt(), 2.0f64.sqrt())); + unsafe fn test_mm_sqrt_pd() { + let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0)); + assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt())); } #[simd_test = "sse2"] - unsafe fn _mm_sub_sd() { - let a = f64x2::new(1.0, 2.0); - let b = f64x2::new(5.0, 10.0); - let r = sse2::_mm_sub_sd(a, b); - assert_eq!(r, f64x2::new(-4.0, 2.0)); + unsafe fn test_mm_sub_sd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_sub_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0)); } #[simd_test = "sse2"] - unsafe fn _mm_sub_pd() { - let a = f64x2::new(1.0, 2.0); - let b = f64x2::new(5.0, 10.0); - let r = sse2::_mm_sub_pd(a, b); - assert_eq!(r, f64x2::new(-4.0, -8.0)); + unsafe fn test_mm_sub_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(5.0, 10.0); + let r = _mm_sub_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0)); } #[simd_test = "sse2"] - unsafe fn _mm_and_pd() { - use std::mem::transmute; - - let a: f64x2 = transmute(u64x2::splat(5)); - let b: f64x2 = transmute(u64x2::splat(3)); - let r = sse2::_mm_and_pd(a, b); - let e: f64x2 = transmute(u64x2::splat(1)); - assert_eq!(r, e); + unsafe fn test_mm_and_pd() { + let a = transmute(u64x2::splat(5)); + let b = transmute(u64x2::splat(3)); + let r = _mm_and_pd(a, b); + let e = transmute(u64x2::splat(1)); + assert_eq_m128d(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_andnot_pd() { - use std::mem::transmute; - - let a: f64x2 = transmute(u64x2::splat(5)); - let b: f64x2 = transmute(u64x2::splat(3)); - let r = sse2::_mm_andnot_pd(a, b); - let e: f64x2 = transmute(u64x2::splat(2)); - assert_eq!(r, e); + unsafe fn test_mm_andnot_pd() { + let a = transmute(u64x2::splat(5)); + let b = transmute(u64x2::splat(3)); + let r = _mm_andnot_pd(a, b); + let e = transmute(u64x2::splat(2)); + assert_eq_m128d(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_or_pd() { - use std::mem::transmute; - - let a: f64x2 = transmute(u64x2::splat(5)); - let b: f64x2 = transmute(u64x2::splat(3)); - let r = sse2::_mm_or_pd(a, b); - let e: f64x2 = transmute(u64x2::splat(7)); - assert_eq!(r, e); + unsafe fn test_mm_or_pd() { + let a = transmute(u64x2::splat(5)); + let b = transmute(u64x2::splat(3)); + let r = _mm_or_pd(a, b); + let e = transmute(u64x2::splat(7)); + assert_eq_m128d(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_xor_pd() { - use std::mem::transmute; - - let a: f64x2 = transmute(u64x2::splat(5)); - let b: f64x2 = transmute(u64x2::splat(3)); - let r = sse2::_mm_xor_pd(a, b); - let e: f64x2 = transmute(u64x2::splat(6)); - assert_eq!(r, e); + unsafe fn test_mm_xor_pd() { + let a = transmute(u64x2::splat(5)); + let b = transmute(u64x2::splat(3)); + let r = _mm_xor_pd(a, b); + let e = transmute(u64x2::splat(6)); + assert_eq_m128d(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpeq_sd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - let e = u64x2::new(!0, transmute(2.0f64)); - let r: u64x2 = transmute(sse2::_mm_cmpeq_sd(a, b)); + unsafe fn test_mm_cmpeq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmplt_sd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0)); - let e = u64x2::new(!0, transmute(2.0f64)); - let r: u64x2 = transmute(sse2::_mm_cmplt_sd(a, b)); + unsafe fn test_mm_cmplt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(!0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmple_sd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - let e = u64x2::new(!0, transmute(2.0f64)); - let r: u64x2 = transmute(sse2::_mm_cmple_sd(a, b)); + unsafe fn test_mm_cmple_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpgt_sd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(5.0, 2.0), f64x2::new(1.0, 3.0)); - let e = u64x2::new(!0, transmute(2.0f64)); - let r: u64x2 = transmute(sse2::_mm_cmpgt_sd(a, b)); + unsafe fn test_mm_cmpgt_sd() { + let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpge_sd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - let e = u64x2::new(!0, transmute(2.0f64)); - let r: u64x2 = transmute(sse2::_mm_cmpge_sd(a, b)); + unsafe fn test_mm_cmpge_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpord_sd() { - use std::f64::NAN; - use std::mem::transmute; - - let (a, b) = (f64x2::new(NAN, 2.0), f64x2::new(5.0, 3.0)); - let e = u64x2::new(0, transmute(2.0f64)); - let r: u64x2 = transmute(sse2::_mm_cmpord_sd(a, b)); + unsafe fn test_mm_cmpord_sd() { + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpunord_sd() { - use std::f64::NAN; - use std::mem::transmute; - - let (a, b) = (f64x2::new(NAN, 2.0), f64x2::new(5.0, 3.0)); - let e = u64x2::new(!0, transmute(2.0f64)); - let r: u64x2 = transmute(sse2::_mm_cmpunord_sd(a, b)); + unsafe fn test_mm_cmpunord_sd() { + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(!0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpneq_sd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0)); - let e = u64x2::new(!0, transmute(2.0f64)); - let r: u64x2 = transmute(sse2::_mm_cmpneq_sd(a, b)); + unsafe fn test_mm_cmpneq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(!0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpnlt_sd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0)); - let e = u64x2::new(0, transmute(2.0f64)); - let r: u64x2 = transmute(sse2::_mm_cmpnlt_sd(a, b)); + unsafe fn test_mm_cmpnlt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpnle_sd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - let e = u64x2::new(0, transmute(2.0f64)); - let r: u64x2 = transmute(sse2::_mm_cmpnle_sd(a, b)); + unsafe fn test_mm_cmpnle_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpngt_sd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(5.0, 2.0), f64x2::new(1.0, 3.0)); - let e = u64x2::new(0, transmute(2.0f64)); - let r: u64x2 = transmute(sse2::_mm_cmpngt_sd(a, b)); + unsafe fn test_mm_cmpngt_sd() { + let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpnge_sd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - let e = u64x2::new(0, transmute(2.0f64)); - let r: u64x2 = transmute(sse2::_mm_cmpnge_sd(a, b)); + unsafe fn test_mm_cmpnge_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, transmute(2.0f64)); + let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpeq_pd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - let e = u64x2::new(!0, 0); - let r: u64x2 = transmute(sse2::_mm_cmpeq_pd(a, b)); + unsafe fn test_mm_cmpeq_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, 0); + let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmplt_pd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - let e = u64x2::new(0, !0); - let r: u64x2 = transmute(sse2::_mm_cmplt_pd(a, b)); + unsafe fn test_mm_cmplt_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, !0); + let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmple_pd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - let e = u64x2::new(!0, !0); - let r: u64x2 = transmute(sse2::_mm_cmple_pd(a, b)); + unsafe fn test_mm_cmple_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, !0); + let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpgt_pd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - let e = u64x2::new(0, 0); - let r: u64x2 = transmute(sse2::_mm_cmpgt_pd(a, b)); + unsafe fn test_mm_cmpgt_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, 0); + let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpge_pd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - let e = u64x2::new(!0, 0); - let r: u64x2 = transmute(sse2::_mm_cmpge_pd(a, b)); + unsafe fn test_mm_cmpge_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(!0, 0); + let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpord_pd() { - use std::f64::NAN; - use std::mem::transmute; - - let (a, b) = (f64x2::new(NAN, 2.0), f64x2::new(5.0, 3.0)); - let e = u64x2::new(0, !0); - let r: u64x2 = transmute(sse2::_mm_cmpord_pd(a, b)); + unsafe fn test_mm_cmpord_pd() { + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(0, !0); + let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpunord_pd() { - use std::f64::NAN; - use std::mem::transmute; - - let (a, b) = (f64x2::new(NAN, 2.0), f64x2::new(5.0, 3.0)); - let e = u64x2::new(!0, 0); - let r: u64x2 = transmute(sse2::_mm_cmpunord_pd(a, b)); + unsafe fn test_mm_cmpunord_pd() { + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(!0, 0); + let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpneq_pd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0)); - let e = u64x2::new(!0, !0); - let r: u64x2 = transmute(sse2::_mm_cmpneq_pd(a, b)); + unsafe fn test_mm_cmpneq_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(!0, !0); + let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpnlt_pd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0)); - let e = u64x2::new(0, 0); - let r: u64x2 = transmute(sse2::_mm_cmpnlt_pd(a, b)); + unsafe fn test_mm_cmpnlt_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); + let e = _mm_setr_epi64x(0, 0); + let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpnle_pd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - let e = u64x2::new(0, 0); - let r: u64x2 = transmute(sse2::_mm_cmpnle_pd(a, b)); + unsafe fn test_mm_cmpnle_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, 0); + let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpngt_pd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(5.0, 2.0), f64x2::new(1.0, 3.0)); - let e = u64x2::new(0, !0); - let r: u64x2 = transmute(sse2::_mm_cmpngt_pd(a, b)); + unsafe fn test_mm_cmpngt_pd() { + let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, !0); + let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cmpnge_pd() { - use std::mem::transmute; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - let e = u64x2::new(0, !0); - let r: u64x2 = transmute(sse2::_mm_cmpnge_pd(a, b)); + unsafe fn test_mm_cmpnge_pd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + let e = _mm_setr_epi64x(0, !0); + let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b)); assert_eq!(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_comieq_sd() { - use std::f64::NAN; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - assert!(sse2::_mm_comieq_sd(a, b)); + unsafe fn test_mm_comieq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comieq_sd(a, b)); - let (a, b) = (f64x2::new(NAN, 2.0), f64x2::new(1.0, 3.0)); - assert!(!sse2::_mm_comieq_sd(a, b)); + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(!_mm_comieq_sd(a, b)); } #[simd_test = "sse2"] - unsafe fn _mm_comilt_sd() { - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - assert!(!sse2::_mm_comilt_sd(a, b)); + unsafe fn test_mm_comilt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(!_mm_comilt_sd(a, b)); } #[simd_test = "sse2"] - unsafe fn _mm_comile_sd() { - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - assert!(sse2::_mm_comile_sd(a, b)); + unsafe fn test_mm_comile_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comile_sd(a, b)); } #[simd_test = "sse2"] - unsafe fn _mm_comigt_sd() { - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - assert!(!sse2::_mm_comigt_sd(a, b)); + unsafe fn test_mm_comigt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(!_mm_comigt_sd(a, b)); } #[simd_test = "sse2"] - unsafe fn _mm_comige_sd() { - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - assert!(sse2::_mm_comige_sd(a, b)); + unsafe fn test_mm_comige_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_comige_sd(a, b)); } #[simd_test = "sse2"] - unsafe fn _mm_comineq_sd() { - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - assert!(!sse2::_mm_comineq_sd(a, b)); + unsafe fn test_mm_comineq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(!_mm_comineq_sd(a, b)); } #[simd_test = "sse2"] - unsafe fn _mm_ucomieq_sd() { - use std::f64::NAN; - - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - assert!(sse2::_mm_ucomieq_sd(a, b)); + unsafe fn test_mm_ucomieq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_ucomieq_sd(a, b)); - let (a, b) = (f64x2::new(NAN, 2.0), f64x2::new(NAN, 3.0)); - assert!(!sse2::_mm_ucomieq_sd(a, b)); + let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0)); + assert!(!_mm_ucomieq_sd(a, b)); } #[simd_test = "sse2"] - unsafe fn _mm_ucomilt_sd() { - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - assert!(!sse2::_mm_ucomilt_sd(a, b)); + unsafe fn test_mm_ucomilt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(!_mm_ucomilt_sd(a, b)); } #[simd_test = "sse2"] - unsafe fn _mm_ucomile_sd() { - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - assert!(sse2::_mm_ucomile_sd(a, b)); + unsafe fn test_mm_ucomile_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_ucomile_sd(a, b)); } #[simd_test = "sse2"] - unsafe fn _mm_ucomigt_sd() { - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - assert!(!sse2::_mm_ucomigt_sd(a, b)); + unsafe fn test_mm_ucomigt_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(!_mm_ucomigt_sd(a, b)); } #[simd_test = "sse2"] - unsafe fn _mm_ucomige_sd() { - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - assert!(sse2::_mm_ucomige_sd(a, b)); + unsafe fn test_mm_ucomige_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(_mm_ucomige_sd(a, b)); } #[simd_test = "sse2"] - unsafe fn _mm_ucomineq_sd() { - let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); - assert!(!sse2::_mm_ucomineq_sd(a, b)); + unsafe fn test_mm_ucomineq_sd() { + let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); + assert!(!_mm_ucomineq_sd(a, b)); } #[simd_test = "sse2"] - unsafe fn _mm_movemask_pd() { - let r = sse2::_mm_movemask_pd(f64x2::new(-1.0, 5.0)); + unsafe fn test_mm_movemask_pd() { + let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0)); assert_eq!(r, 0b01); - let r = sse2::_mm_movemask_pd(f64x2::new(-1.0, -5.0)); + let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0)); assert_eq!(r, 0b11); } @@ -3888,83 +3859,83 @@ mod tests { } #[simd_test = "sse2"] - unsafe fn _mm_load_pd() { + unsafe fn test_mm_load_pd() { let mem = Memory { data: [1.0f64, 2.0, 3.0, 4.0], }; let vals = &mem.data; let d = vals.as_ptr(); - let r = sse2::_mm_load_pd(d); - assert_eq!(r, f64x2::new(1.0, 2.0)); + let r = _mm_load_pd(d); + assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); } #[simd_test = "sse2"] - unsafe fn _mm_load_sd() { + unsafe fn test_mm_load_sd() { let a = 1.; - let expected = f64x2::new(a, 0.); - let r = sse2::_mm_load_sd(&a); - assert_eq!(r, expected); + let expected = _mm_setr_pd(a, 0.); + let r = _mm_load_sd(&a); + assert_eq_m128d(r, expected); } #[simd_test = "sse2"] - unsafe fn _mm_loadh_pd() { - let a = f64x2::new(1., 2.); + unsafe fn test_mm_loadh_pd() { + let a = _mm_setr_pd(1., 2.); let b = 3.; - let expected = f64x2::new(a.extract(0), 3.); - let r = sse2::_mm_loadh_pd(a, &b); - assert_eq!(r, expected); + let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.); + let r = _mm_loadh_pd(a, &b); + assert_eq_m128d(r, expected); } #[simd_test = "sse2"] - unsafe fn _mm_loadl_pd() { - let a = f64x2::new(1., 2.); + unsafe fn test_mm_loadl_pd() { + let a = _mm_setr_pd(1., 2.); let b = 3.; - let expected = f64x2::new(3., a.extract(1)); - let r = sse2::_mm_loadl_pd(a, &b); - assert_eq!(r, expected); + let expected = _mm_setr_pd(3., get_m128d(a, 1)); + let r = _mm_loadl_pd(a, &b); + assert_eq_m128d(r, expected); } #[simd_test = "sse2"] - unsafe fn _mm_stream_pd() { + unsafe fn test_mm_stream_pd() { #[repr(align(128))] struct Memory { pub data: [f64; 2], } - let a = f64x2::splat(7.0); + let a = _mm_set1_pd(7.0); let mut mem = Memory { data: [-1.0; 2] }; - sse2::_mm_stream_pd(&mut mem.data[0] as *mut f64, a); + _mm_stream_pd(&mut mem.data[0] as *mut f64, a); for i in 0..2 { - assert_eq!(mem.data[i], a.extract(i as u32)); + assert_eq!(mem.data[i], get_m128d(a, i)); } } #[simd_test = "sse2"] - unsafe fn _mm_store_sd() { + unsafe fn test_mm_store_sd() { let mut dest = 0.; - let a = f64x2::new(1., 2.); - sse2::_mm_store_sd(&mut dest, a); - assert_eq!(dest, a.extract(0)); + let a = _mm_setr_pd(1., 2.); + _mm_store_sd(&mut dest, a); + assert_eq!(dest, _mm_cvtsd_f64(a)); } #[simd_test = "sse2"] - unsafe fn _mm_store_pd() { + unsafe fn test_mm_store_pd() { let mut mem = Memory { data: [0.0f64; 4] }; let vals = &mut mem.data; - let a = f64x2::new(1.0, 2.0); + let a = _mm_setr_pd(1.0, 2.0); let d = vals.as_mut_ptr(); - sse2::_mm_store_pd(d, *black_box(&a)); + _mm_store_pd(d, *black_box(&a)); assert_eq!(vals[0], 1.0); assert_eq!(vals[1], 2.0); } #[simd_test = "sse"] - unsafe fn _mm_storeu_pd() { + unsafe fn test_mm_storeu_pd() { let mut mem = Memory { data: [0.0f64; 4] }; let vals = &mut mem.data; - let a = f64x2::new(1.0, 2.0); + let a = _mm_setr_pd(1.0, 2.0); let mut ofs = 0; let mut p = vals.as_mut_ptr(); @@ -3975,7 +3946,7 @@ mod tests { p = p.offset(1); } - sse2::_mm_storeu_pd(p, *black_box(&a)); + _mm_storeu_pd(p, *black_box(&a)); if ofs > 0 { assert_eq!(vals[ofs - 1], 0.0); @@ -3985,71 +3956,71 @@ mod tests { } #[simd_test = "sse2"] - unsafe fn _mm_store1_pd() { + unsafe fn test_mm_store1_pd() { let mut mem = Memory { data: [0.0f64; 4] }; let vals = &mut mem.data; - let a = f64x2::new(1.0, 2.0); + let a = _mm_setr_pd(1.0, 2.0); let d = vals.as_mut_ptr(); - sse2::_mm_store1_pd(d, *black_box(&a)); + _mm_store1_pd(d, *black_box(&a)); assert_eq!(vals[0], 1.0); assert_eq!(vals[1], 1.0); } #[simd_test = "sse2"] - unsafe fn _mm_store_pd1() { + unsafe fn test_mm_store_pd1() { let mut mem = Memory { data: [0.0f64; 4] }; let vals = &mut mem.data; - let a = f64x2::new(1.0, 2.0); + let a = _mm_setr_pd(1.0, 2.0); let d = vals.as_mut_ptr(); - sse2::_mm_store_pd1(d, *black_box(&a)); + _mm_store_pd1(d, *black_box(&a)); assert_eq!(vals[0], 1.0); assert_eq!(vals[1], 1.0); } #[simd_test = "sse2"] - unsafe fn _mm_storer_pd() { + unsafe fn test_mm_storer_pd() { let mut mem = Memory { data: [0.0f64; 4] }; let vals = &mut mem.data; - let a = f64x2::new(1.0, 2.0); + let a = _mm_setr_pd(1.0, 2.0); let d = vals.as_mut_ptr(); - sse2::_mm_storer_pd(d, *black_box(&a)); + _mm_storer_pd(d, *black_box(&a)); assert_eq!(vals[0], 2.0); assert_eq!(vals[1], 1.0); } #[simd_test = "sse2"] - unsafe fn _mm_storeh_pd() { + unsafe fn test_mm_storeh_pd() { let mut dest = 0.; - let a = f64x2::new(1., 2.); - sse2::_mm_storeh_pd(&mut dest, a); - assert_eq!(dest, a.extract(1)); + let a = _mm_setr_pd(1., 2.); + _mm_storeh_pd(&mut dest, a); + assert_eq!(dest, get_m128d(a, 1)); } #[simd_test = "sse2"] - unsafe fn _mm_storel_pd() { + unsafe fn test_mm_storel_pd() { let mut dest = 0.; - let a = f64x2::new(1., 2.); - sse2::_mm_storel_pd(&mut dest, a); - assert_eq!(dest, a.extract(0)); + let a = _mm_setr_pd(1., 2.); + _mm_storel_pd(&mut dest, a); + assert_eq!(dest, _mm_cvtsd_f64(a)); } #[simd_test = "sse2"] - unsafe fn _mm_loadr_pd() { + unsafe fn test_mm_loadr_pd() { let mut mem = Memory { data: [1.0f64, 2.0, 3.0, 4.0], }; let vals = &mut mem.data; let d = vals.as_ptr(); - let r = sse2::_mm_loadr_pd(d); - assert_eq!(r, f64x2::new(2.0, 1.0)); + let r = _mm_loadr_pd(d); + assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0)); } #[simd_test = "sse2"] - unsafe fn _mm_loadu_pd() { + unsafe fn test_mm_loadu_pd() { let mut mem = Memory { data: [1.0f64, 2.0, 3.0, 4.0], }; @@ -4063,101 +4034,92 @@ mod tests { d = d.offset(offset as isize); } - let r = sse2::_mm_loadu_pd(d); - assert_eq!(r, f64x2::new(1.0, 2.0) + f64x2::splat(offset as f64)); + let r = _mm_loadu_pd(d); + let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64)); + assert_eq_m128d(r, e); } #[simd_test = "sse2"] - unsafe fn _mm_cvtpd_ps() { - use std::{f32, f64}; - - let r = sse2::_mm_cvtpd_ps(f64x2::new(-1.0, 5.0)); - assert_eq!(r, f32x4::new(-1.0, 5.0, 0.0, 0.0)); + unsafe fn test_mm_cvtpd_ps() { + let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0)); + assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0)); - let r = sse2::_mm_cvtpd_ps(f64x2::new(-1.0, -5.0)); - assert_eq!(r, f32x4::new(-1.0, -5.0, 0.0, 0.0)); + let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0)); + assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0)); - let r = sse2::_mm_cvtpd_ps(f64x2::new(f64::MAX, f64::MIN)); - assert_eq!(r, f32x4::new(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0)); + let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN)); + assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0)); let r = - sse2::_mm_cvtpd_ps(f64x2::new(f32::MAX as f64, f32::MIN as f64)); - assert_eq!(r, f32x4::new(f32::MAX, f32::MIN, 0.0, 0.0)); + _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64)); + assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0)); } #[simd_test = "sse2"] - unsafe fn _mm_cvtps_pd() { - use std::{f32, f64}; - - let r = sse2::_mm_cvtps_pd(f32x4::new(-1.0, 2.0, -3.0, 5.0)); - assert_eq!(r, f64x2::new(-1.0, 2.0)); + unsafe fn test_mm_cvtps_pd() { + let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0)); + assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0)); - let r = sse2::_mm_cvtps_pd(f32x4::new( + let r = _mm_cvtps_pd(_mm_setr_ps( f32::MAX, f32::INFINITY, f32::NEG_INFINITY, f32::MIN, )); - assert_eq!(r, f64x2::new(f32::MAX as f64, f64::INFINITY)); + assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY)); } #[simd_test = "sse2"] - unsafe fn _mm_cvtpd_epi32() { - use std::{f64, i32}; + unsafe fn test_mm_cvtpd_epi32() { + let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0)); + assert_eq!(r, _mm_setr_epi32(-1, 5, 0, 0)); - let r = sse2::_mm_cvtpd_epi32(f64x2::new(-1.0, 5.0)); - assert_eq!(r, i32x4::new(-1, 5, 0, 0)); + let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0)); + assert_eq!(r, _mm_setr_epi32(-1, -5, 0, 0)); - let r = sse2::_mm_cvtpd_epi32(f64x2::new(-1.0, -5.0)); - assert_eq!(r, i32x4::new(-1, -5, 0, 0)); + let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN)); + assert_eq!(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); - let r = sse2::_mm_cvtpd_epi32(f64x2::new(f64::MAX, f64::MIN)); - assert_eq!(r, i32x4::new(i32::MIN, i32::MIN, 0, 0)); - - let r = sse2::_mm_cvtpd_epi32(f64x2::new( + let r = _mm_cvtpd_epi32(_mm_setr_pd( f64::INFINITY, f64::NEG_INFINITY, )); - assert_eq!(r, i32x4::new(i32::MIN, i32::MIN, 0, 0)); + assert_eq!(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); - let r = sse2::_mm_cvtpd_epi32(f64x2::new(f64::NAN, f64::NAN)); - assert_eq!(r, i32x4::new(i32::MIN, i32::MIN, 0, 0)); + let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN)); + assert_eq!(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_cvtsd_si32() { - use std::{f64, i32}; - - let r = sse2::_mm_cvtsd_si32(f64x2::new(-2.0, 5.0)); + unsafe fn test_mm_cvtsd_si32() { + let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0)); assert_eq!(r, -2); - let r = sse2::_mm_cvtsd_si32(f64x2::new(f64::MAX, f64::MIN)); + let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN)); assert_eq!(r, i32::MIN); - let r = sse2::_mm_cvtsd_si32(f64x2::new(f64::NAN, f64::NAN)); + let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN)); assert_eq!(r, i32::MIN); } #[simd_test = "sse2"] - unsafe fn _mm_cvtsd_ss() { - use std::{f32, f64}; + unsafe fn test_mm_cvtsd_ss() { + let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4); + let b = _mm_setr_pd(2.0, -5.0); - let a = f32x4::new(-1.1, -2.2, 3.3, 4.4); - let b = f64x2::new(2.0, -5.0); + let r = _mm_cvtsd_ss(a, b); - let r = sse2::_mm_cvtsd_ss(a, b); - - assert_eq!(r, f32x4::new(2.0, -2.2, 3.3, 4.4)); + assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4)); let a = - f32x4::new(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY); - let b = f64x2::new(f64::INFINITY, -5.0); + _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY); + let b = _mm_setr_pd(f64::INFINITY, -5.0); - let r = sse2::_mm_cvtsd_ss(a, b); + let r = _mm_cvtsd_ss(a, b); - assert_eq!( + assert_eq_m128( r, - f32x4::new( + _mm_setr_ps( f32::INFINITY, f32::NEG_INFINITY, f32::MAX, @@ -4167,197 +4129,189 @@ mod tests { } #[simd_test = "sse2"] - unsafe fn _mm_cvtsd_f64() { - let r = sse2::_mm_cvtsd_f64(f64x2::new(-1.1, 2.2)); + unsafe fn test_mm_cvtsd_f64() { + let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2)); assert_eq!(r, -1.1); } #[simd_test = "sse2"] - unsafe fn _mm_cvtss_sd() { - use std::{f32, f64}; - - let a = f64x2::new(-1.1, 2.2); - let b = f32x4::new(1.0, 2.0, 3.0, 4.0); + unsafe fn test_mm_cvtss_sd() { + let a = _mm_setr_pd(-1.1, 2.2); + let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let r = sse2::_mm_cvtss_sd(a, b); - assert_eq!(r, f64x2::new(1.0, 2.2)); + let r = _mm_cvtss_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2)); - let a = f64x2::new(-1.1, f64::INFINITY); - let b = f32x4::new(f32::NEG_INFINITY, 2.0, 3.0, 4.0); + let a = _mm_setr_pd(-1.1, f64::INFINITY); + let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0); - let r = sse2::_mm_cvtss_sd(a, b); - assert_eq!(r, f64x2::new(f64::NEG_INFINITY, f64::INFINITY)); + let r = _mm_cvtss_sd(a, b); + assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY)); } #[simd_test = "sse2"] - unsafe fn _mm_cvttpd_epi32() { - use std::{f64, i32}; + unsafe fn test_mm_cvttpd_epi32() { + let a = _mm_setr_pd(-1.1, 2.2); + let r = _mm_cvttpd_epi32(a); + assert_eq!(r, _mm_setr_epi32(-1, 2, 0, 0)); - let a = f64x2::new(-1.1, 2.2); - let r = sse2::_mm_cvttpd_epi32(a); - assert_eq!(r, i32x4::new(-1, 2, 0, 0)); - - let a = f64x2::new(f64::NEG_INFINITY, f64::NAN); - let r = sse2::_mm_cvttpd_epi32(a); - assert_eq!(r, i32x4::new(i32::MIN, i32::MIN, 0, 0)); + let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); + let r = _mm_cvttpd_epi32(a); + assert_eq!(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_cvttsd_si32() { - use std::{f64, i32}; - - let a = f64x2::new(-1.1, 2.2); - let r = sse2::_mm_cvttsd_si32(a); + unsafe fn test_mm_cvttsd_si32() { + let a = _mm_setr_pd(-1.1, 2.2); + let r = _mm_cvttsd_si32(a); assert_eq!(r, -1); - let a = f64x2::new(f64::NEG_INFINITY, f64::NAN); - let r = sse2::_mm_cvttsd_si32(a); + let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); + let r = _mm_cvttsd_si32(a); assert_eq!(r, i32::MIN); } #[simd_test = "sse2"] - unsafe fn _mm_cvttps_epi32() { - use std::{f32, i32}; - - let a = f32x4::new(-1.1, 2.2, -3.3, 6.6); - let r = sse2::_mm_cvttps_epi32(a); - assert_eq!(r, i32x4::new(-1, 2, -3, 6)); + unsafe fn test_mm_cvttps_epi32() { + let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6); + let r = _mm_cvttps_epi32(a); + assert_eq!(r, _mm_setr_epi32(-1, 2, -3, 6)); let a = - f32x4::new(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX); - let r = sse2::_mm_cvttps_epi32(a); - assert_eq!(r, i32x4::new(i32::MIN, i32::MIN, i32::MIN, i32::MIN)); + _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX); + let r = _mm_cvttps_epi32(a); + assert_eq!(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN)); } #[simd_test = "sse2"] - unsafe fn _mm_set_sd() { - let r = sse2::_mm_set_sd(-1.0_f64); - assert_eq!(r, f64x2::new(-1.0_f64, 0_f64)); + unsafe fn test_mm_set_sd() { + let r = _mm_set_sd(-1.0_f64); + assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64)); } #[simd_test = "sse2"] - unsafe fn _mm_set1_pd() { - let r = sse2::_mm_set1_pd(-1.0_f64); - assert_eq!(r, f64x2::new(-1.0_f64, -1.0_f64)); + unsafe fn test_mm_set1_pd() { + let r = _mm_set1_pd(-1.0_f64); + assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64)); } #[simd_test = "sse2"] - unsafe fn _mm_set_pd1() { - let r = sse2::_mm_set_pd1(-2.0_f64); - assert_eq!(r, f64x2::new(-2.0_f64, -2.0_f64)); + unsafe fn test_mm_set_pd1() { + let r = _mm_set_pd1(-2.0_f64); + assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64)); } #[simd_test = "sse2"] - unsafe fn _mm_set_pd() { - let r = sse2::_mm_set_pd(1.0_f64, 5.0_f64); - assert_eq!(r, f64x2::new(5.0_f64, 1.0_f64)); + unsafe fn test_mm_set_pd() { + let r = _mm_set_pd(1.0_f64, 5.0_f64); + assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64)); } #[simd_test = "sse2"] - unsafe fn _mm_setr_pd() { - let r = sse2::_mm_setr_pd(1.0_f64, -5.0_f64); - assert_eq!(r, f64x2::new(1.0_f64, -5.0_f64)); + unsafe fn test_mm_setr_pd() { + let r = _mm_setr_pd(1.0_f64, -5.0_f64); + assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64)); } #[simd_test = "sse2"] - unsafe fn _mm_setzero_pd() { - let r = sse2::_mm_setzero_pd(); - assert_eq!(r, f64x2::new(0_f64, 0_f64)); + unsafe fn test_mm_setzero_pd() { + let r = _mm_setzero_pd(); + assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64)); } #[simd_test = "sse2"] - unsafe fn _mm_load1_pd() { + unsafe fn test_mm_load1_pd() { let d = -5.0; - let r = sse2::_mm_load1_pd(&d); - assert_eq!(r, f64x2::new(d, d)); + let r = _mm_load1_pd(&d); + assert_eq_m128d(r, _mm_setr_pd(d, d)); } #[simd_test = "sse2"] - unsafe fn _mm_load_pd1() { + unsafe fn test_mm_load_pd1() { let d = -5.0; - let r = sse2::_mm_load_pd1(&d); - assert_eq!(r, f64x2::new(d, d)); + let r = _mm_load_pd1(&d); + assert_eq_m128d(r, _mm_setr_pd(d, d)); } #[simd_test = "sse2"] - unsafe fn _mm_unpackhi_pd() { - let a = f64x2::new(1.0, 2.0); - let b = f64x2::new(3.0, 4.0); - let r = sse2::_mm_unpackhi_pd(a, b); - assert_eq!(r, f64x2::new(2.0, 4.0)); + unsafe fn test_mm_unpackhi_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(3.0, 4.0); + let r = _mm_unpackhi_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0)); } #[simd_test = "sse2"] - unsafe fn _mm_unpacklo_pd() { - let a = f64x2::new(1.0, 2.0); - let b = f64x2::new(3.0, 4.0); - let r = sse2::_mm_unpacklo_pd(a, b); - assert_eq!(r, f64x2::new(1.0, 3.0)); + unsafe fn test_mm_unpacklo_pd() { + let a = _mm_setr_pd(1.0, 2.0); + let b = _mm_setr_pd(3.0, 4.0); + let r = _mm_unpacklo_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0)); } #[simd_test = "sse2"] - unsafe fn _mm_shuffle_pd() { - let a = f64x2::new(1., 2.); - let b = f64x2::new(3., 4.); - let expected = f64x2::new(1., 3.); - let r = sse2::_mm_shuffle_pd(a, b, 0); - assert_eq!(r, expected); + unsafe fn test_mm_shuffle_pd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(3., 4.); + let expected = _mm_setr_pd(1., 3.); + let r = _mm_shuffle_pd(a, b, 0); + assert_eq_m128d(r, expected); } #[simd_test = "sse2"] - unsafe fn _mm_move_sd() { - let a = f64x2::new(1., 2.); - let b = f64x2::new(3., 4.); - let expected = f64x2::new(3., 2.); - let r = sse2::_mm_move_sd(a, b); - assert_eq!(r, expected); + unsafe fn test_mm_move_sd() { + let a = _mm_setr_pd(1., 2.); + let b = _mm_setr_pd(3., 4.); + let expected = _mm_setr_pd(3., 2.); + let r = _mm_move_sd(a, b); + assert_eq_m128d(r, expected); } #[simd_test = "sse2"] - unsafe fn _mm_castpd_ps() { - let a = f64x2::splat(0.); - let expected = f32x4::splat(0.); - let r = sse2::_mm_castpd_ps(a); - assert_eq!(r, expected); + unsafe fn test_mm_castpd_ps() { + let a = _mm_set1_pd(0.); + let expected = _mm_set1_ps(0.); + let r = _mm_castpd_ps(a); + assert_eq_m128(r, expected); } #[simd_test = "sse2"] - unsafe fn _mm_castpd_si128() { - let a = f64x2::splat(0.); - let expected = i64x2::splat(0); - let r = sse2::_mm_castpd_si128(a); - assert_eq!(r, __m128i::from(expected)); + unsafe fn test_mm_castpd_si128() { + let a = _mm_set1_pd(0.); + let expected = _mm_set1_epi64x(0); + let r = _mm_castpd_si128(a); + assert_eq!(r, expected); } #[simd_test = "sse2"] - unsafe fn _mm_castps_pd() { - let a = f32x4::splat(0.); - let expected = f64x2::splat(0.); - let r = sse2::_mm_castps_pd(a); - assert_eq!(r, expected); + unsafe fn test_mm_castps_pd() { + let a = _mm_set1_ps(0.); + let expected = _mm_set1_pd(0.); + let r = _mm_castps_pd(a); + assert_eq_m128d(r, expected); } #[simd_test = "sse2"] - unsafe fn _mm_castps_si128() { - let a = f32x4::splat(0.); - let expected = i32x4::splat(0); - let r = sse2::_mm_castps_si128(a); - assert_eq!(r, __m128i::from(expected)); + unsafe fn test_mm_castps_si128() { + let a = _mm_set1_ps(0.); + let expected = _mm_set1_epi32(0); + let r = _mm_castps_si128(a); + assert_eq!(r, expected); } #[simd_test = "sse2"] - unsafe fn _mm_castsi128_pd() { - let a = __m128i::from(i64x2::splat(0)); - let expected = f64x2::splat(0.); - let r = sse2::_mm_castsi128_pd(a); - assert_eq!(r, expected); + unsafe fn test_mm_castsi128_pd() { + let a = _mm_set1_epi64x(0); + let expected = _mm_set1_pd(0.); + let r = _mm_castsi128_pd(a); + assert_eq_m128d(r, expected); } #[simd_test = "sse2"] - unsafe fn _mm_castsi128_ps() { - let a = __m128i::from(i32x4::splat(0)); - let expected = f32x4::splat(0.); - let r = sse2::_mm_castsi128_ps(a); - assert_eq!(r, expected); + unsafe fn test_mm_castsi128_ps() { + let a = _mm_set1_epi32(0); + let expected = _mm_set1_ps(0.); + let r = _mm_castsi128_ps(a); + assert_eq_m128(r, expected); } } diff --git a/coresimd/src/x86/i586/sse3.rs b/coresimd/src/x86/i586/sse3.rs index c582bdbf50093..99cac550ac886 100644 --- a/coresimd/src/x86/i586/sse3.rs +++ b/coresimd/src/x86/i586/sse3.rs @@ -2,6 +2,7 @@ use simd_llvm::{simd_shuffle2, simd_shuffle4}; use v128::*; +use x86::*; #[cfg(test)] use stdsimd_test::assert_instr; @@ -84,8 +85,7 @@ pub unsafe fn _mm_movedup_pd(a: f64x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse3"] #[cfg_attr(test, assert_instr(movddup))] -pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> f64x2 { - use x86::i586::sse2::_mm_load1_pd; +pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d { _mm_load1_pd(mem_addr) } @@ -131,9 +131,10 @@ mod tests { use v128::*; use x86::i586::sse3; + use x86::*; #[simd_test = "sse3"] - unsafe fn _mm_addsub_ps() { + unsafe fn test_mm_addsub_ps() { let a = f32x4::new(-1.0, 5.0, 0.0, -10.0); let b = f32x4::new(-100.0, 20.0, 0.0, -5.0); let r = sse3::_mm_addsub_ps(a, b); @@ -141,7 +142,7 @@ mod tests { } #[simd_test = "sse3"] - unsafe fn _mm_addsub_pd() { + unsafe fn test_mm_addsub_pd() { let a = f64x2::new(-1.0, 5.0); let b = f64x2::new(-100.0, 20.0); let r = sse3::_mm_addsub_pd(a, b); @@ -149,7 +150,7 @@ mod tests { } #[simd_test = "sse3"] - unsafe fn _mm_hadd_pd() { + unsafe fn test_mm_hadd_pd() { let a = f64x2::new(-1.0, 5.0); let b = f64x2::new(-100.0, 20.0); let r = sse3::_mm_hadd_pd(a, b); @@ -157,7 +158,7 @@ mod tests { } #[simd_test = "sse3"] - unsafe fn _mm_hadd_ps() { + unsafe fn test_mm_hadd_ps() { let a = f32x4::new(-1.0, 5.0, 0.0, -10.0); let b = f32x4::new(-100.0, 20.0, 0.0, -5.0); let r = sse3::_mm_hadd_ps(a, b); @@ -165,7 +166,7 @@ mod tests { } #[simd_test = "sse3"] - unsafe fn _mm_hsub_pd() { + unsafe fn test_mm_hsub_pd() { let a = f64x2::new(-1.0, 5.0); let b = f64x2::new(-100.0, 20.0); let r = sse3::_mm_hsub_pd(a, b); @@ -173,7 +174,7 @@ mod tests { } #[simd_test = "sse3"] - unsafe fn _mm_hsub_ps() { + unsafe fn test_mm_hsub_ps() { let a = f32x4::new(-1.0, 5.0, 0.0, -10.0); let b = f32x4::new(-100.0, 20.0, 0.0, -5.0); let r = sse3::_mm_hsub_ps(a, b); @@ -181,7 +182,7 @@ mod tests { } #[simd_test = "sse3"] - unsafe fn _mm_lddqu_si128() { + unsafe fn test_mm_lddqu_si128() { #[cfg_attr(rustfmt, rustfmt_skip)] let a = __m128i::from(i8x16::new( 1, 2, 3, 4, @@ -194,30 +195,30 @@ mod tests { } #[simd_test = "sse3"] - unsafe fn _mm_movedup_pd() { + unsafe fn test_mm_movedup_pd() { let a = f64x2::new(-1.0, 5.0); let r = sse3::_mm_movedup_pd(a); assert_eq!(r, f64x2::new(-1.0, -1.0)); } #[simd_test = "sse3"] - unsafe fn _mm_movehdup_ps() { + unsafe fn test_mm_movehdup_ps() { let a = f32x4::new(-1.0, 5.0, 0.0, -10.0); let r = sse3::_mm_movehdup_ps(a); assert_eq!(r, f32x4::new(5.0, 5.0, -10.0, -10.0)); } #[simd_test = "sse3"] - unsafe fn _mm_moveldup_ps() { + unsafe fn test_mm_moveldup_ps() { let a = f32x4::new(-1.0, 5.0, 0.0, -10.0); let r = sse3::_mm_moveldup_ps(a); assert_eq!(r, f32x4::new(-1.0, -1.0, 0.0, 0.0)); } #[simd_test = "sse3"] - unsafe fn _mm_loaddup_pd() { + unsafe fn test_mm_loaddup_pd() { let d = -5.0; - let r = sse3::_mm_loaddup_pd(&d); - assert_eq!(r, f64x2::new(d, d)); + let r = _mm_loaddup_pd(&d); + assert_eq_m128d(r, _mm_setr_pd(d, d)); } } diff --git a/coresimd/src/x86/i686/sse41.rs b/coresimd/src/x86/i686/sse41.rs index 16e767ff97914..d088c3042366c 100644 --- a/coresimd/src/x86/i686/sse41.rs +++ b/coresimd/src/x86/i686/sse41.rs @@ -1,6 +1,7 @@ //! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1) use v128::*; +use x86::*; #[cfg(test)] use stdsimd_test::assert_instr; @@ -111,8 +112,7 @@ pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { #[cfg_attr(test, assert_instr(pcmpeqd))] #[cfg_attr(test, assert_instr(ptest))] pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 { - let b = i32x4::from(a); - _mm_testc_si128(a, __m128i::from(::x86::_mm_cmpeq_epi32(b, b))) + _mm_testc_si128(a, _mm_cmpeq_epi32(a, a)) } /// Tests whether the specified bits in a 128-bit integer vector are diff --git a/coresimd/src/x86/mod.rs b/coresimd/src/x86/mod.rs index 6b82095ea2d3c..971770481d817 100644 --- a/coresimd/src/x86/mod.rs +++ b/coresimd/src/x86/mod.rs @@ -1,5 +1,7 @@ //! `x86` and `x86_64` intrinsics. +use core::mem; + #[macro_use] mod macros; @@ -8,6 +10,70 @@ mod macros; #[allow(non_camel_case_types)] pub struct __m128(f32, f32, f32, f32); +#[repr(simd)] +#[derive(Clone, Copy, Debug)] +#[allow(non_camel_case_types)] +pub struct __m128d(f64, f64); + +pub use v128::__m128i; +pub use v64::__m64; + +#[cfg(test)] +mod test; +#[cfg(test)] +pub use self::test::*; + +#[doc(hidden)] +#[allow(non_camel_case_types)] +trait m128iExt: Sized { + fn as_m128i(self) -> __m128i; + + #[inline(always)] + fn as_u8x16(self) -> ::v128::u8x16 { + unsafe { mem::transmute(self.as_m128i()) } + } + + #[inline(always)] + fn as_u16x8(self) -> ::v128::u16x8 { + unsafe { mem::transmute(self.as_m128i()) } + } + + #[inline(always)] + fn as_u32x4(self) -> ::v128::u32x4 { + unsafe { mem::transmute(self.as_m128i()) } + } + + #[inline(always)] + fn as_u64x2(self) -> ::v128::u64x2 { + unsafe { mem::transmute(self.as_m128i()) } + } + + #[inline(always)] + fn as_i8x16(self) -> ::v128::i8x16 { + unsafe { mem::transmute(self.as_m128i()) } + } + + #[inline(always)] + fn as_i16x8(self) -> ::v128::i16x8 { + unsafe { mem::transmute(self.as_m128i()) } + } + + #[inline(always)] + fn as_i32x4(self) -> ::v128::i32x4 { + unsafe { mem::transmute(self.as_m128i()) } + } + + #[inline(always)] + fn as_i64x2(self) -> ::v128::i64x2 { + unsafe { mem::transmute(self.as_m128i()) } + } +} + +impl m128iExt for __m128i { + #[inline(always)] + fn as_m128i(self) -> __m128i { self } +} + mod i386; pub use self::i386::*; diff --git a/coresimd/src/x86/test.rs b/coresimd/src/x86/test.rs new file mode 100644 index 0000000000000..a4a6e358b5297 --- /dev/null +++ b/coresimd/src/x86/test.rs @@ -0,0 +1,32 @@ +//! Utilities used in testing the x86 intrinsics + +use std::mem; + +use x86::*; + +#[target_feature = "+sse2"] +pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) { + if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 { + panic!("{:?} != {:?}", a, b); + } +} + +#[target_feature = "+sse2"] +pub unsafe fn get_m128d(a: __m128d, idx: usize) -> f64 { + union A { a: __m128d, b: [f64; 2] }; + mem::transmute::<__m128d, A>(a).b[idx] +} + +#[target_feature = "+sse"] +pub unsafe fn assert_eq_m128(a: __m128, b: __m128) { + let r = _mm_cmpeq_ps(a, b); + if _mm_movemask_ps(r) != 0b1111 { + panic!("{:?} != {:?}", a, b); + } +} + +#[target_feature = "+sse"] +pub unsafe fn get_m128(a: __m128, idx: usize) -> f32 { + union A { a: __m128, b: [f32; 4] }; + mem::transmute::<__m128, A>(a).b[idx] +} diff --git a/src/lib.rs b/src/lib.rs index 277ad2a74cfcd..87152ba4bf46c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -63,9 +63,12 @@ //! #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] //! #[target_feature = "+sse2"] //! unsafe fn sum_sse2(x: i32x4) -> i32 { -//! let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x.into(), 8).into()); -//! let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x.into(), 4).into()); -//! vendor::_mm_cvtsi128_si32(x) +//! use std::mem; +//! let x: vendor::__m128i = mem::transmute(x); +//! let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x, 8)); +//! let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x, 4)); +//! let ret = vendor::_mm_cvtsi128_si32(x); +//! mem::transmute(ret) //! } //! //! // Uses the SSE2 version if SSE2 is enabled for all target diff --git a/stdsimd-verify/src/lib.rs b/stdsimd-verify/src/lib.rs index 4519bce26e031..5fe888c24cd5b 100644 --- a/stdsimd-verify/src/lib.rs +++ b/stdsimd-verify/src/lib.rs @@ -98,6 +98,7 @@ fn to_type(t: &syn::Type) -> Tokens { match *t { syn::Type::Path(ref p) => match extract_path_ident(&p.path).as_ref() { "__m128" => my_quote! { &F32x4 }, + "__m128d" => my_quote! { &F64x2 }, "__m128i" => my_quote! { &I8x16 }, "__m256i" => my_quote! { &I8x32 }, "__m64" => my_quote! { &I8x8 }, @@ -178,6 +179,10 @@ fn walk(root: &Path, files: &mut Vec) { continue; } + if path.file_name().and_then(|s| s.to_str()) == Some("test.rs") { + continue + } + let mut contents = String::new(); File::open(&path) .unwrap()