From b0425e658f369acf2efed4445f5887438fb923bf Mon Sep 17 00:00:00 2001 From: minybot Date: Tue, 2 Mar 2021 00:36:01 -0500 Subject: [PATCH] Convert shuffle_ps and shuffle_pd to const generics (#1037) --- crates/core_arch/src/x86/avx.rs | 131 +-- crates/core_arch/src/x86/avx512f.rs | 1470 +++++++++--------------- crates/core_arch/src/x86/macros.rs | 16 + crates/core_arch/src/x86/sse2.rs | 18 +- crates/core_arch/src/x86_64/avx512f.rs | 180 +-- 5 files changed, 676 insertions(+), 1139 deletions(-) diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 6c9a03322d1a9..c27d4772f6cd4 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -113,44 +113,21 @@ pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_pd) #[inline] #[target_feature(enable = "avx")] -#[cfg_attr(test, assert_instr(vshufpd, imm8 = 0x1))] -#[rustc_args_required_const(2)] -#[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { - let imm8 = (imm8 & 0xFF) as u8; - macro_rules! shuffle4 { - ($a:expr, $b:expr, $c:expr, $d:expr) => { - simd_shuffle4(a, b, [$a, $b, $c, $d]) - }; - } - macro_rules! shuffle3 { - ($a:expr, $b:expr, $c:expr) => { - match (imm8 >> 3) & 0x1 { - 0 => shuffle4!($a, $b, $c, 6), - _ => shuffle4!($a, $b, $c, 7), - } - }; - } - macro_rules! shuffle2 { - ($a:expr, $b:expr) => { - match (imm8 >> 2) & 0x1 { - 0 => shuffle3!($a, $b, 2), - _ => shuffle3!($a, $b, 3), - } - }; - } - macro_rules! shuffle1 { - ($a:expr) => { - match (imm8 >> 1) & 0x1 { - 0 => shuffle2!($a, 4), - _ => shuffle2!($a, 5), - } - }; - } - match imm8 & 0x1 { - 0 => shuffle1!(0), - _ => shuffle1!(1), - } +#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d) -> __m256d { + static_assert_imm8!(MASK); + simd_shuffle4( + a, + b, + [ + MASK as u32 & 0b1, + ((MASK as u32 >> 1) & 0b1) + 4, + ((MASK as u32 >> 2) & 0b1) + 2, + ((MASK as u32 >> 3) & 0b1) + 6, + ], + ) } /// Shuffles single-precision (32-bit) floating-point elements in `a` within @@ -159,61 +136,25 @@ pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_ps) #[inline] #[target_feature(enable = "avx")] -#[cfg_attr(test, assert_instr(vshufps, imm8 = 0x0))] -#[rustc_args_required_const(2)] -#[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { - let imm8 = (imm8 & 0xFF) as u8; - macro_rules! shuffle4 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr, - $e:expr, - $f:expr, - $g:expr, - $h:expr - ) => { - simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]) - }; - } - macro_rules! shuffle3 { - ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => { - match (imm8 >> 6) & 0x3 { - 0 => shuffle4!($a, $b, $c, 8, $e, $f, $g, 12), - 1 => shuffle4!($a, $b, $c, 9, $e, $f, $g, 13), - 2 => shuffle4!($a, $b, $c, 10, $e, $f, $g, 14), - _ => shuffle4!($a, $b, $c, 11, $e, $f, $g, 15), - } - }; - } - macro_rules! shuffle2 { - ($a:expr, $b:expr, $e:expr, $f:expr) => { - match (imm8 >> 4) & 0x3 { - 0 => shuffle3!($a, $b, 8, $e, $f, 12), - 1 => shuffle3!($a, $b, 9, $e, $f, 13), - 2 => shuffle3!($a, $b, 10, $e, $f, 14), - _ => shuffle3!($a, $b, 11, $e, $f, 15), - } - }; - } - macro_rules! shuffle1 { - ($a:expr, $e:expr) => { - match (imm8 >> 2) & 0x3 { - 0 => shuffle2!($a, 0, $e, 4), - 1 => shuffle2!($a, 1, $e, 5), - 2 => shuffle2!($a, 2, $e, 6), - _ => shuffle2!($a, 3, $e, 7), - } - }; - } - match imm8 & 0x3 { - 0 => shuffle1!(0, 4), - 1 => shuffle1!(1, 5), - 2 => shuffle1!(2, 6), - _ => shuffle1!(3, 7), - } +#[cfg_attr(test, assert_instr(vshufps, MASK = 3))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256) -> __m256 { + static_assert_imm8!(MASK); + simd_shuffle8( + a, + b, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11) + 8, + ((MASK as u32 >> 6) & 0b11) + 8, + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 12, + ((MASK as u32 >> 6) & 0b11) + 12, + ], + ) } /// Computes the bitwise NOT of packed double-precision (64-bit) floating-point @@ -3381,7 +3322,7 @@ mod tests { unsafe fn test_mm256_shuffle_pd() { let a = _mm256_setr_pd(1., 4., 5., 8.); let b = _mm256_setr_pd(2., 3., 6., 7.); - let r = _mm256_shuffle_pd(a, b, 0xF); + let r = _mm256_shuffle_pd::<0b11_11_11_11>(a, b); let e = _mm256_setr_pd(4., 3., 8., 7.); assert_eq_m256d(r, e); } @@ -3390,7 +3331,7 @@ mod tests { unsafe fn test_mm256_shuffle_ps() { let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.); - let r = _mm256_shuffle_ps(a, b, 0x0F); + let r = _mm256_shuffle_ps::<0b00_00_11_11>(a, b); let e = _mm256_setr_ps(8., 8., 2., 2., 16., 16., 10., 10.); assert_eq_m256(r, e); } diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 4391359204449..c495b6c8ae51e 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -4872,23 +4872,13 @@ pub unsafe fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_ps&expand=4784) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_roundscale_ps(a: __m512, imm8: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_roundscale_ps(a: __m512) -> __m512 { + static_assert_imm8!(IMM8); let a = a.as_f32x16(); let zero = _mm512_setzero_ps().as_f32x16(); - macro_rules! call { - ($imm8:expr) => { - vrndscaleps( - a, - $imm8, - zero, - 0b11111111_11111111, - _MM_FROUND_CUR_DIRECTION, - ) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscaleps(a, IMM8, zero, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -4903,17 +4893,17 @@ pub unsafe fn _mm512_roundscale_ps(a: __m512, imm8: i32) -> __m512 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_ps&expand=4782) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_roundscale_ps(src: __m512, k: __mmask16, a: __m512, imm8: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_roundscale_ps( + src: __m512, + k: __mmask16, + a: __m512, +) -> __m512 { + static_assert_imm8!(IMM8); let a = a.as_f32x16(); let src = src.as_f32x16(); - macro_rules! call { - ($imm8:expr) => { - vrndscaleps(a, $imm8, src, k, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscaleps(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -4928,17 +4918,13 @@ pub unsafe fn _mm512_mask_roundscale_ps(src: __m512, k: __mmask16, a: __m512, im /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_ps&expand=4783) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_roundscale_ps(k: __mmask16, a: __m512, imm8: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_roundscale_ps(k: __mmask16, a: __m512) -> __m512 { + static_assert_imm8!(IMM8); let a = a.as_f32x16(); let zero = _mm512_setzero_ps().as_f32x16(); - macro_rules! call { - ($imm8:expr) => { - vrndscaleps(a, $imm8, zero, k, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscaleps(a, IMM8, zero, k, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -4953,17 +4939,13 @@ pub unsafe fn _mm512_maskz_roundscale_ps(k: __mmask16, a: __m512, imm8: i32) -> /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_ps&expand=4781) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 250))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm256_roundscale_ps(a: __m256, imm8: i32) -> __m256 { +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm256_roundscale_ps(a: __m256) -> __m256 { + static_assert_imm8!(IMM8); let a = a.as_f32x8(); let zero = _mm256_setzero_ps().as_f32x8(); - macro_rules! call { - ($imm8:expr) => { - vrndscaleps256(a, $imm8, zero, 0b11111111) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscaleps256(a, IMM8, zero, 0b11111111); transmute(r) } @@ -4978,17 +4960,17 @@ pub unsafe fn _mm256_roundscale_ps(a: __m256, imm8: i32) -> __m256 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_ps&expand=4779) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_roundscale_ps(src: __m256, k: __mmask8, a: __m256, imm8: i32) -> __m256 { +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_roundscale_ps( + src: __m256, + k: __mmask8, + a: __m256, +) -> __m256 { + static_assert_imm8!(IMM8); let a = a.as_f32x8(); let src = src.as_f32x8(); - macro_rules! call { - ($imm8:expr) => { - vrndscaleps256(a, $imm8, src, k) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscaleps256(a, IMM8, src, k); transmute(r) } @@ -5003,17 +4985,13 @@ pub unsafe fn _mm256_mask_roundscale_ps(src: __m256, k: __mmask8, a: __m256, imm /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_ps&expand=4780) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_roundscale_ps(k: __mmask8, a: __m256, imm8: i32) -> __m256 { +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_roundscale_ps(k: __mmask8, a: __m256) -> __m256 { + static_assert_imm8!(IMM8); let a = a.as_f32x8(); let zero = _mm256_setzero_ps().as_f32x8(); - macro_rules! call { - ($imm8:expr) => { - vrndscaleps256(a, $imm8, zero, k) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscaleps256(a, IMM8, zero, k); transmute(r) } @@ -5028,17 +5006,13 @@ pub unsafe fn _mm256_maskz_roundscale_ps(k: __mmask8, a: __m256, imm8: i32) -> _ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ps&expand=4778) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 250))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_roundscale_ps(a: __m128, imm8: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_roundscale_ps(a: __m128) -> __m128 { + static_assert_imm8!(IMM8); let a = a.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm8:expr) => { - vrndscaleps128(a, $imm8, zero, 0b00001111) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscaleps128(a, IMM8, zero, 0b00001111); transmute(r) } @@ -5053,17 +5027,17 @@ pub unsafe fn _mm_roundscale_ps(a: __m128, imm8: i32) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ps&expand=4776) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_roundscale_ps(src: __m128, k: __mmask8, a: __m128, imm8: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_roundscale_ps( + src: __m128, + k: __mmask8, + a: __m128, +) -> __m128 { + static_assert_imm8!(IMM8); let a = a.as_f32x4(); let src = src.as_f32x4(); - macro_rules! call { - ($imm8:expr) => { - vrndscaleps128(a, $imm8, src, k) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscaleps128(a, IMM8, src, k); transmute(r) } @@ -5078,17 +5052,13 @@ pub unsafe fn _mm_mask_roundscale_ps(src: __m128, k: __mmask8, a: __m128, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ps&expand=4777) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_roundscale_ps(k: __mmask8, a: __m128, imm8: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_roundscale_ps(k: __mmask8, a: __m128) -> __m128 { + static_assert_imm8!(IMM8); let a = a.as_f32x4(); let zero = _mm_setzero_ps().as_f32x4(); - macro_rules! call { - ($imm8:expr) => { - vrndscaleps128(a, $imm8, zero, k) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscaleps128(a, IMM8, zero, k); transmute(r) } @@ -5103,17 +5073,13 @@ pub unsafe fn _mm_maskz_roundscale_ps(k: __mmask8, a: __m128, imm8: i32) -> __m1 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_pd&expand=4775) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_roundscale_pd(a: __m512d, imm8: i32) -> __m512d { +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_roundscale_pd(a: __m512d) -> __m512d { + static_assert_imm8!(IMM8); let a = a.as_f64x8(); let zero = _mm512_setzero_pd().as_f64x8(); - macro_rules! call { - ($imm8:expr) => { - vrndscalepd(a, $imm8, zero, 0b11111111, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscalepd(a, IMM8, zero, 0b11111111, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -5128,22 +5094,17 @@ pub unsafe fn _mm512_roundscale_pd(a: __m512d, imm8: i32) -> __m512d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_pd&expand=4773) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_roundscale_pd( +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_mask_roundscale_pd( src: __m512d, k: __mmask8, a: __m512d, - imm8: i32, ) -> __m512d { + static_assert_imm8!(IMM8); let a = a.as_f64x8(); let src = src.as_f64x8(); - macro_rules! call { - ($imm8:expr) => { - vrndscalepd(a, $imm8, src, k, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscalepd(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -5158,17 +5119,13 @@ pub unsafe fn _mm512_mask_roundscale_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_pd&expand=4774) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_roundscale_pd(k: __mmask8, a: __m512d, imm8: i32) -> __m512d { +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_maskz_roundscale_pd(k: __mmask8, a: __m512d) -> __m512d { + static_assert_imm8!(IMM8); let a = a.as_f64x8(); let zero = _mm512_setzero_pd().as_f64x8(); - macro_rules! call { - ($imm8:expr) => { - vrndscalepd(a, $imm8, zero, k, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscalepd(a, IMM8, zero, k, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -5183,17 +5140,13 @@ pub unsafe fn _mm512_maskz_roundscale_pd(k: __mmask8, a: __m512d, imm8: i32) -> /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_roundscale_pd&expand=4772) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm256_roundscale_pd(a: __m256d, imm8: i32) -> __m256d { +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm256_roundscale_pd(a: __m256d) -> __m256d { + static_assert_imm8!(IMM8); let a = a.as_f64x4(); let zero = _mm256_setzero_pd().as_f64x4(); - macro_rules! call { - ($imm8:expr) => { - vrndscalepd256(a, $imm8, zero, 0b00001111) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscalepd256(a, IMM8, zero, 0b00001111); transmute(r) } @@ -5208,22 +5161,17 @@ pub unsafe fn _mm256_roundscale_pd(a: __m256d, imm8: i32) -> __m256d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_roundscale_pd&expand=4770) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_mask_roundscale_pd( +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_mask_roundscale_pd( src: __m256d, k: __mmask8, a: __m256d, - imm8: i32, ) -> __m256d { + static_assert_imm8!(IMM8); let a = a.as_f64x4(); let src = src.as_f64x4(); - macro_rules! call { - ($imm8:expr) => { - vrndscalepd256(a, $imm8, src, k) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscalepd256(a, IMM8, src, k); transmute(r) } @@ -5238,17 +5186,13 @@ pub unsafe fn _mm256_mask_roundscale_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_roundscale_pd&expand=4771) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_roundscale_pd(k: __mmask8, a: __m256d, imm8: i32) -> __m256d { +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_maskz_roundscale_pd(k: __mmask8, a: __m256d) -> __m256d { + static_assert_imm8!(IMM8); let a = a.as_f64x4(); let zero = _mm256_setzero_pd().as_f64x4(); - macro_rules! call { - ($imm8:expr) => { - vrndscalepd256(a, $imm8, zero, k) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscalepd256(a, IMM8, zero, k); transmute(r) } @@ -5263,17 +5207,13 @@ pub unsafe fn _mm256_maskz_roundscale_pd(k: __mmask8, a: __m256d, imm8: i32) -> /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_pd&expand=4769) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm_roundscale_pd(a: __m128d, imm8: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_roundscale_pd(a: __m128d) -> __m128d { + static_assert_imm8!(IMM8); let a = a.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm8:expr) => { - vrndscalepd128(a, $imm8, zero, 0b00000011) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscalepd128(a, IMM8, zero, 0b00000011); transmute(r) } @@ -5288,17 +5228,17 @@ pub unsafe fn _mm_roundscale_pd(a: __m128d, imm8: i32) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_pd&expand=4767) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_roundscale_pd(src: __m128d, k: __mmask8, a: __m128d, imm8: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_mask_roundscale_pd( + src: __m128d, + k: __mmask8, + a: __m128d, +) -> __m128d { + static_assert_imm8!(IMM8); let a = a.as_f64x2(); let src = src.as_f64x2(); - macro_rules! call { - ($imm8:expr) => { - vrndscalepd128(a, $imm8, src, k) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscalepd128(a, IMM8, src, k); transmute(r) } @@ -5313,17 +5253,13 @@ pub unsafe fn _mm_mask_roundscale_pd(src: __m128d, k: __mmask8, a: __m128d, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_pd&expand=4768) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_roundscale_pd(k: __mmask8, a: __m128d, imm8: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_maskz_roundscale_pd(k: __mmask8, a: __m128d) -> __m128d { + static_assert_imm8!(IMM8); let a = a.as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); - macro_rules! call { - ($imm8:expr) => { - vrndscalepd128(a, $imm8, zero, k) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vrndscalepd128(a, IMM8, zero, k); transmute(r) } @@ -5588,25 +5524,14 @@ pub unsafe fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fixupimm_ps&expand=2499) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_fixupimm_ps(a: __m512, b: __m512, c: __m512i, imm8: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_fixupimm_ps(a: __m512, b: __m512, c: __m512i) -> __m512 { + static_assert_imm8!(IMM8); let a = a.as_f32x16(); let b = b.as_f32x16(); let c = c.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmps( - a, - b, - c, - $imm8, - 0b11111111_11111111, - _MM_FROUND_CUR_DIRECTION, - ) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -5615,24 +5540,19 @@ pub unsafe fn _mm512_fixupimm_ps(a: __m512, b: __m512, c: __m512i, imm8: i32) -> /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fixupimm_ps&expand=2500) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_fixupimm_ps( +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_fixupimm_ps( a: __m512, k: __mmask16, b: __m512, c: __m512i, - imm8: i32, ) -> __m512 { + static_assert_imm8!(IMM8); let a = a.as_f32x16(); let b = b.as_f32x16(); let c = c.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmps(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmps(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -5641,24 +5561,19 @@ pub unsafe fn _mm512_mask_fixupimm_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fixupimm_ps&expand=2501) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_maskz_fixupimm_ps( +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_maskz_fixupimm_ps( k: __mmask16, a: __m512, b: __m512, c: __m512i, - imm8: i32, ) -> __m512 { + static_assert_imm8!(IMM8); let a = a.as_f32x16(); let b = b.as_f32x16(); let c = c.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmpsz(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmpsz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -5667,18 +5582,14 @@ pub unsafe fn _mm512_maskz_fixupimm_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fixupimm_ps&expand=2496) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_fixupimm_ps(a: __m256, b: __m256, c: __m256i, imm8: i32) -> __m256 { +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_fixupimm_ps(a: __m256, b: __m256, c: __m256i) -> __m256 { + static_assert_imm8!(IMM8); let a = a.as_f32x8(); let b = b.as_f32x8(); let c = c.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmps256(a, b, c, $imm8, 0b11111111) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmps256(a, b, c, IMM8, 0b11111111); transmute(r) } @@ -5687,24 +5598,19 @@ pub unsafe fn _mm256_fixupimm_ps(a: __m256, b: __m256, c: __m256i, imm8: i32) -> /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fixupimm_ps&expand=2497) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_fixupimm_ps( +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_fixupimm_ps( a: __m256, k: __mmask8, b: __m256, c: __m256i, - imm8: i32, ) -> __m256 { + static_assert_imm8!(IMM8); let a = a.as_f32x8(); let b = b.as_f32x8(); let c = c.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmps256(a, b, c, $imm8, k) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmps256(a, b, c, IMM8, k); transmute(r) } @@ -5713,24 +5619,19 @@ pub unsafe fn _mm256_mask_fixupimm_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fixupimm_ps&expand=2498) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_maskz_fixupimm_ps( +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_maskz_fixupimm_ps( k: __mmask8, a: __m256, b: __m256, c: __m256i, - imm8: i32, ) -> __m256 { + static_assert_imm8!(IMM8); let a = a.as_f32x8(); let b = b.as_f32x8(); let c = c.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmpsz256(a, b, c, $imm8, k) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmpsz256(a, b, c, IMM8, k); transmute(r) } @@ -5739,18 +5640,14 @@ pub unsafe fn _mm256_maskz_fixupimm_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fixupimm_ps&expand=2493) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fixupimm_ps(a: __m128, b: __m128, c: __m128i, imm8: i32) -> __m128 { +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fixupimm_ps(a: __m128, b: __m128, c: __m128i) -> __m128 { + static_assert_imm8!(IMM8); let a = a.as_f32x4(); let b = b.as_f32x4(); let c = c.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmps128(a, b, c, $imm8, 0b00001111) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmps128(a, b, c, IMM8, 0b00001111); transmute(r) } @@ -5759,24 +5656,19 @@ pub unsafe fn _mm_fixupimm_ps(a: __m128, b: __m128, c: __m128i, imm8: i32) -> __ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fixupimm_ps&expand=2494) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fixupimm_ps( +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fixupimm_ps( a: __m128, k: __mmask8, b: __m128, c: __m128i, - imm8: i32, ) -> __m128 { + static_assert_imm8!(IMM8); let a = a.as_f32x4(); let b = b.as_f32x4(); let c = c.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmps128(a, b, c, $imm8, k) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmps128(a, b, c, IMM8, k); transmute(r) } @@ -5785,24 +5677,19 @@ pub unsafe fn _mm_mask_fixupimm_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fixupimm_ps&expand=2495) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fixupimm_ps( +#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fixupimm_ps( k: __mmask8, a: __m128, b: __m128, c: __m128i, - imm8: i32, ) -> __m128 { + static_assert_imm8!(IMM8); let a = a.as_f32x4(); let b = b.as_f32x4(); let c = c.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmpsz128(a, b, c, $imm8, k) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmpsz128(a, b, c, IMM8, k); transmute(r) } @@ -5811,18 +5698,14 @@ pub unsafe fn _mm_maskz_fixupimm_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fixupimm_pd&expand=2490) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_fixupimm_pd(a: __m512d, b: __m512d, c: __m512i, imm8: i32) -> __m512d { +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_fixupimm_pd(a: __m512d, b: __m512d, c: __m512i) -> __m512d { + static_assert_imm8!(IMM8); let a = a.as_f64x8(); let b = b.as_f64x8(); let c = c.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmpd(a, b, c, $imm8, 0b11111111, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -5831,24 +5714,19 @@ pub unsafe fn _mm512_fixupimm_pd(a: __m512d, b: __m512d, c: __m512i, imm8: i32) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fixupimm_pd&expand=2491) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_fixupimm_pd( +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_fixupimm_pd( a: __m512d, k: __mmask8, b: __m512d, c: __m512i, - imm8: i32, ) -> __m512d { + static_assert_imm8!(IMM8); let a = a.as_f64x8(); let b = b.as_f64x8(); let c = c.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmpd(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmpd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -5857,24 +5735,19 @@ pub unsafe fn _mm512_mask_fixupimm_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fixupimm_pd&expand=2492) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_maskz_fixupimm_pd( +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_maskz_fixupimm_pd( k: __mmask8, a: __m512d, b: __m512d, c: __m512i, - imm8: i32, ) -> __m512d { + static_assert_imm8!(IMM8); let a = a.as_f64x8(); let b = b.as_f64x8(); let c = c.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmpdz(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmpdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -5883,18 +5756,14 @@ pub unsafe fn _mm512_maskz_fixupimm_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fixupimm_pd&expand=2487) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_fixupimm_pd(a: __m256d, b: __m256d, c: __m256i, imm8: i32) -> __m256d { +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_fixupimm_pd(a: __m256d, b: __m256d, c: __m256i) -> __m256d { + static_assert_imm8!(IMM8); let a = a.as_f64x4(); let b = b.as_f64x4(); let c = c.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmpd256(a, b, c, $imm8, 0b00001111) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmpd256(a, b, c, IMM8, 0b00001111); transmute(r) } @@ -5903,24 +5772,19 @@ pub unsafe fn _mm256_fixupimm_pd(a: __m256d, b: __m256d, c: __m256i, imm8: i32) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_fixupimm_pd&expand=2488) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_fixupimm_pd( +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_fixupimm_pd( a: __m256d, k: __mmask8, b: __m256d, c: __m256i, - imm8: i32, ) -> __m256d { + static_assert_imm8!(IMM8); let a = a.as_f64x4(); let b = b.as_f64x4(); let c = c.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmpd256(a, b, c, $imm8, k) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmpd256(a, b, c, IMM8, k); transmute(r) } @@ -5929,24 +5793,19 @@ pub unsafe fn _mm256_mask_fixupimm_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_fixupimm_pd&expand=2489) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_maskz_fixupimm_pd( +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_maskz_fixupimm_pd( k: __mmask8, a: __m256d, b: __m256d, c: __m256i, - imm8: i32, ) -> __m256d { + static_assert_imm8!(IMM8); let a = a.as_f64x4(); let b = b.as_f64x4(); let c = c.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmpdz256(a, b, c, $imm8, k) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmpdz256(a, b, c, IMM8, k); transmute(r) } @@ -5955,18 +5814,14 @@ pub unsafe fn _mm256_maskz_fixupimm_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fixupimm_pd&expand=2484) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_fixupimm_pd(a: __m128d, b: __m128d, c: __m128i, imm8: i32) -> __m128d { +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_fixupimm_pd(a: __m128d, b: __m128d, c: __m128i) -> __m128d { + static_assert_imm8!(IMM8); let a = a.as_f64x2(); let b = b.as_f64x2(); let c = c.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmpd128(a, b, c, $imm8, 0b00000011) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmpd128(a, b, c, IMM8, 0b00000011); transmute(r) } @@ -5975,24 +5830,19 @@ pub unsafe fn _mm_fixupimm_pd(a: __m128d, b: __m128d, c: __m128i, imm8: i32) -> /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fixupimm_pd&expand=2485) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_fixupimm_pd( +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_fixupimm_pd( a: __m128d, k: __mmask8, b: __m128d, c: __m128i, - imm8: i32, ) -> __m128d { + static_assert_imm8!(IMM8); let a = a.as_f64x2(); let b = b.as_f64x2(); let c = c.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmpd128(a, b, c, $imm8, k) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmpd128(a, b, c, IMM8, k); transmute(r) } @@ -6001,24 +5851,19 @@ pub unsafe fn _mm_mask_fixupimm_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fixupimm_pd&expand=2486) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_fixupimm_pd( +#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_fixupimm_pd( k: __mmask8, a: __m128d, b: __m128d, c: __m128i, - imm8: i32, ) -> __m128d { + static_assert_imm8!(IMM8); let a = a.as_f64x2(); let b = b.as_f64x2(); let c = c.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vfixupimmpdz128(a, b, c, $imm8, k) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vfixupimmpdz128(a, b, c, IMM8, k); transmute(r) } @@ -6027,18 +5872,18 @@ pub unsafe fn _mm_maskz_fixupimm_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ternarylogic_epi32&expand=5867) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_ternarylogic_epi32(a: __m512i, b: __m512i, c: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_ternarylogic_epi32( + a: __m512i, + b: __m512i, + c: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i32x16(); let b = b.as_i32x16(); let c = c.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vpternlogd(a, b, c, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vpternlogd(a, b, c, IMM8); transmute(r) } @@ -6047,25 +5892,20 @@ pub unsafe fn _mm512_ternarylogic_epi32(a: __m512i, b: __m512i, c: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ternarylogic_epi32&expand=5865) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_ternarylogic_epi32( +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_ternarylogic_epi32( src: __m512i, k: __mmask16, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { + static_assert_imm8!(IMM8); let src = src.as_i32x16(); let a = a.as_i32x16(); let b = b.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vpternlogd(src, a, b, $imm8) - }; - } - let ternarylogic = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ternarylogic, src)) + let r = vpternlogd(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). @@ -6073,26 +5913,21 @@ pub unsafe fn _mm512_mask_ternarylogic_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ternarylogic_epi32&expand=5866) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_maskz_ternarylogic_epi32( +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_maskz_ternarylogic_epi32( k: __mmask16, a: __m512i, b: __m512i, c: __m512i, - imm8: i32, ) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i32x16(); let b = b.as_i32x16(); let c = c.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vpternlogd(a, b, c, $imm8) - }; - } - let ternarylogic = constify_imm8_sae!(imm8, call); + let r = vpternlogd(a, b, c, IMM8); let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, ternarylogic, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. @@ -6100,18 +5935,18 @@ pub unsafe fn _mm512_maskz_ternarylogic_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_ternarylogic_epi32&expand=5864) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_ternarylogic_epi32(a: __m256i, b: __m256i, c: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_ternarylogic_epi32( + a: __m256i, + b: __m256i, + c: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i32x8(); let b = b.as_i32x8(); let c = c.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vpternlogd256(a, b, c, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vpternlogd256(a, b, c, IMM8); transmute(r) } @@ -6120,25 +5955,20 @@ pub unsafe fn _mm256_ternarylogic_epi32(a: __m256i, b: __m256i, c: __m256i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_ternarylogic_epi32&expand=5862) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_ternarylogic_epi32( +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_ternarylogic_epi32( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { + static_assert_imm8!(IMM8); let src = src.as_i32x8(); let a = a.as_i32x8(); let b = b.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vpternlogd256(src, a, b, $imm8) - }; - } - let ternarylogic = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ternarylogic, src)) + let r = vpternlogd256(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). @@ -6146,26 +5976,21 @@ pub unsafe fn _mm256_mask_ternarylogic_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_ternarylogic_epi32&expand=5863) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_maskz_ternarylogic_epi32( +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_maskz_ternarylogic_epi32( k: __mmask8, a: __m256i, b: __m256i, c: __m256i, - imm8: i32, ) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i32x8(); let b = b.as_i32x8(); let c = c.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vpternlogd256(a, b, c, $imm8) - }; - } - let ternarylogic = constify_imm8_sae!(imm8, call); + let r = vpternlogd256(a, b, c, IMM8); let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, ternarylogic, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. @@ -6173,18 +5998,18 @@ pub unsafe fn _mm256_maskz_ternarylogic_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ternarylogic_epi32&expand=5861) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_ternarylogic_epi32(a: __m128i, b: __m128i, c: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_ternarylogic_epi32( + a: __m128i, + b: __m128i, + c: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i32x4(); let b = b.as_i32x4(); let c = c.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vpternlogd128(a, b, c, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vpternlogd128(a, b, c, IMM8); transmute(r) } @@ -6193,25 +6018,20 @@ pub unsafe fn _mm_ternarylogic_epi32(a: __m128i, b: __m128i, c: __m128i, imm8: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_ternarylogic_epi32&expand=5859) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_ternarylogic_epi32( +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_ternarylogic_epi32( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, - imm8: i32, ) -> __m128i { + static_assert_imm8!(IMM8); let src = src.as_i32x4(); let a = a.as_i32x4(); let b = b.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vpternlogd128(src, a, b, $imm8) - }; - } - let ternarylogic = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ternarylogic, src)) + let r = vpternlogd128(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). @@ -6219,26 +6039,21 @@ pub unsafe fn _mm_mask_ternarylogic_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_ternarylogic_epi32&expand=5860) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_ternarylogic_epi32( +#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_ternarylogic_epi32( k: __mmask8, a: __m128i, b: __m128i, c: __m128i, - imm8: i32, ) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i32x4(); let b = b.as_i32x4(); let c = c.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - vpternlogd128(a, b, c, $imm8) - }; - } - let ternarylogic = constify_imm8_sae!(imm8, call); + let r = vpternlogd128(a, b, c, IMM8); let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, ternarylogic, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. @@ -6246,18 +6061,18 @@ pub unsafe fn _mm_maskz_ternarylogic_epi32( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ternarylogic_epi64&expand=5876) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_ternarylogic_epi64(a: __m512i, b: __m512i, c: __m512i, imm8: i32) -> __m512i { +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_ternarylogic_epi64( + a: __m512i, + b: __m512i, + c: __m512i, +) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i64x8(); let b = b.as_i64x8(); let c = c.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vpternlogq(a, b, c, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vpternlogq(a, b, c, IMM8); transmute(r) } @@ -6266,25 +6081,20 @@ pub unsafe fn _mm512_ternarylogic_epi64(a: __m512i, b: __m512i, c: __m512i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ternarylogic_epi64&expand=5874) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_ternarylogic_epi64( +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_ternarylogic_epi64( src: __m512i, k: __mmask8, a: __m512i, b: __m512i, - imm8: i32, ) -> __m512i { + static_assert_imm8!(IMM8); let src = src.as_i64x8(); let a = a.as_i64x8(); let b = b.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vpternlogq(src, a, b, $imm8) - }; - } - let ternarylogic = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ternarylogic, src)) + let r = vpternlogq(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). @@ -6292,26 +6102,21 @@ pub unsafe fn _mm512_mask_ternarylogic_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ternarylogic_epi64&expand=5875) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_maskz_ternarylogic_epi64( +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_maskz_ternarylogic_epi64( k: __mmask8, a: __m512i, b: __m512i, c: __m512i, - imm8: i32, ) -> __m512i { + static_assert_imm8!(IMM8); let a = a.as_i64x8(); let b = b.as_i64x8(); let c = c.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vpternlogq(a, b, c, $imm8) - }; - } - let ternarylogic = constify_imm8_sae!(imm8, call); + let r = vpternlogq(a, b, c, IMM8); let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, ternarylogic, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. @@ -6319,18 +6124,18 @@ pub unsafe fn _mm512_maskz_ternarylogic_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_ternarylogic_epi64&expand=5873) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_ternarylogic_epi64(a: __m256i, b: __m256i, c: __m256i, imm8: i32) -> __m256i { +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_ternarylogic_epi64( + a: __m256i, + b: __m256i, + c: __m256i, +) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i64x4(); let b = b.as_i64x4(); let c = c.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vpternlogq256(a, b, c, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vpternlogq256(a, b, c, IMM8); transmute(r) } @@ -6339,25 +6144,20 @@ pub unsafe fn _mm256_ternarylogic_epi64(a: __m256i, b: __m256i, c: __m256i, imm8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_ternarylogic_epi64&expand=5871) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_ternarylogic_epi64( +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_ternarylogic_epi64( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, - imm8: i32, ) -> __m256i { + static_assert_imm8!(IMM8); let src = src.as_i64x4(); let a = a.as_i64x4(); let b = b.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vpternlogq256(src, a, b, $imm8) - }; - } - let ternarylogic = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ternarylogic, src)) + let r = vpternlogq256(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). @@ -6365,26 +6165,21 @@ pub unsafe fn _mm256_mask_ternarylogic_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_ternarylogic_epi64&expand=5872) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_maskz_ternarylogic_epi64( +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_maskz_ternarylogic_epi64( k: __mmask8, a: __m256i, b: __m256i, c: __m256i, - imm8: i32, ) -> __m256i { + static_assert_imm8!(IMM8); let a = a.as_i64x4(); let b = b.as_i64x4(); let c = c.as_i64x4(); - macro_rules! call { - ($imm8:expr) => { - vpternlogq256(a, b, c, $imm8) - }; - } - let ternarylogic = constify_imm8_sae!(imm8, call); + let r = vpternlogq256(a, b, c, IMM8); let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, ternarylogic, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. @@ -6392,18 +6187,18 @@ pub unsafe fn _mm256_maskz_ternarylogic_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ternarylogic_epi64&expand=5870) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_ternarylogic_epi64(a: __m128i, b: __m128i, c: __m128i, imm8: i32) -> __m128i { +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_ternarylogic_epi64( + a: __m128i, + b: __m128i, + c: __m128i, +) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i64x2(); let b = b.as_i64x2(); let c = c.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vpternlogq128(a, b, c, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = vpternlogq128(a, b, c, IMM8); transmute(r) } @@ -6412,25 +6207,20 @@ pub unsafe fn _mm_ternarylogic_epi64(a: __m128i, b: __m128i, c: __m128i, imm8: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_ternarylogic_epi64&expand=5868) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_ternarylogic_epi64( +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_ternarylogic_epi64( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, - imm8: i32, ) -> __m128i { + static_assert_imm8!(IMM8); let src = src.as_i64x2(); let a = a.as_i64x2(); let b = b.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vpternlogq128(src, a, b, $imm8) - }; - } - let ternarylogic = constify_imm8_sae!(imm8, call); - transmute(simd_select_bitmask(k, ternarylogic, src)) + let r = vpternlogq128(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). @@ -6438,26 +6228,21 @@ pub unsafe fn _mm_mask_ternarylogic_epi64( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_ternarylogic_epi64&expand=5869) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_maskz_ternarylogic_epi64( +#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_maskz_ternarylogic_epi64( k: __mmask8, a: __m128i, b: __m128i, c: __m128i, - imm8: i32, ) -> __m128i { + static_assert_imm8!(IMM8); let a = a.as_i64x2(); let b = b.as_i64x2(); let c = c.as_i64x2(); - macro_rules! call { - ($imm8:expr) => { - vpternlogq128(a, b, c, $imm8) - }; - } - let ternarylogic = constify_imm8_sae!(imm8, call); + let r = vpternlogq128(a, b, c, IMM8); let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, ternarylogic, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign. @@ -7078,17 +6863,13 @@ pub unsafe fn _mm_maskz_getmant_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_ps&expand=145) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddps, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_add_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 { +#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_add_round_ps(a: __m512, b: __m512) -> __m512 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let b = b.as_f32x16(); - macro_rules! call { - ($imm4:expr) => { - vaddps(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vaddps(a, b, ROUNDING); transmute(r) } @@ -7104,24 +6885,19 @@ pub unsafe fn _mm512_add_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_ps&expand=146) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddps, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_add_round_ps( +#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_add_round_ps( src: __m512, k: __mmask16, a: __m512, b: __m512, - rounding: i32, ) -> __m512 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let b = b.as_f32x16(); - macro_rules! call { - ($imm4:expr) => { - vaddps(a, b, $imm4) - }; - } - let addround = constify_imm4_round!(rounding, call); - transmute(simd_select_bitmask(k, addround, src.as_f32x16())) + let r = vaddps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) } /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -7136,24 +6912,19 @@ pub unsafe fn _mm512_mask_add_round_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_ps&expand=147) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddps, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_add_round_ps( +#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_add_round_ps( k: __mmask16, a: __m512, b: __m512, - rounding: i32, ) -> __m512 { + static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let b = b.as_f32x16(); - macro_rules! call { - ($imm4:expr) => { - vaddps(a, b, $imm4) - }; - } - let addround = constify_imm4_round!(rounding, call); + let r = vaddps(a, b, ROUNDING); let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, addround, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\ @@ -7168,17 +6939,13 @@ pub unsafe fn _mm512_maskz_add_round_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_pd&expand=142) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddpd, rounding = 8))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_add_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d { +#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_add_round_pd(a: __m512d, b: __m512d) -> __m512d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let b = b.as_f64x8(); - macro_rules! call { - ($imm4:expr) => { - vaddpd(a, b, $imm4) - }; - } - let r = constify_imm4_round!(rounding, call); + let r = vaddpd(a, b, ROUNDING); transmute(r) } @@ -7194,24 +6961,19 @@ pub unsafe fn _mm512_add_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m5 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_pd&expand=143) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddpd, rounding = 8))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_add_round_pd( +#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_add_round_pd( src: __m512d, k: __mmask8, a: __m512d, b: __m512d, - rounding: i32, ) -> __m512d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let b = b.as_f64x8(); - macro_rules! call { - ($imm4:expr) => { - vaddpd(a, b, $imm4) - }; - } - let addround = constify_imm4_round!(rounding, call); - transmute(simd_select_bitmask(k, addround, src.as_f64x8())) + let r = vaddpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f64x8())) } /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -7226,24 +6988,19 @@ pub unsafe fn _mm512_mask_add_round_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_pd&expand=144) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vaddpd, rounding = 8))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_add_round_pd( +#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_add_round_pd( k: __mmask8, a: __m512d, b: __m512d, - rounding: i32, ) -> __m512d { + static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let b = b.as_f64x8(); - macro_rules! call { - ($imm4:expr) => { - vaddpd(a, b, $imm4) - }; - } - let addround = constify_imm4_round!(rounding, call); + let r = vaddpd(a, b, ROUNDING); let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, addround, zero)) + transmute(simd_select_bitmask(k, r, zero)) } /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\ @@ -22525,75 +22282,32 @@ pub unsafe fn _mm_maskz_shuffle_epi32(k: __mmask8, a: __m128i, imm8: _MM_PERM_EN /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_ps&expand=5203) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufps, imm8 = 0))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shuffle_ps(a: __m512, b: __m512, imm8: i32) -> __m512 { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; - macro_rules! shuffle4 { - ( - $a:expr, - $b:expr, - $c:expr, - $d:expr, - $e:expr, - $f:expr, - $g:expr, - $h:expr, - $i:expr, - $j:expr, - $k:expr, - $l:expr, - $m:expr, - $n:expr, - $o:expr, - $p:expr - ) => { - simd_shuffle16( - a, - b, - [ - $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p, - ], - ) - }; - } - macro_rules! shuffle3 { - ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => { - match (imm8 >> 6) & 0x3 { - 0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28), - 1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29), - 2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30), - _ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31), - } - }; - } - macro_rules! shuffle2 { - ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => { - match (imm8 >> 4) & 0x3 { - 0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28), - 1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29), - 2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30), - _ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31), - } - }; - } - macro_rules! shuffle1 { - ($a:expr, $e:expr, $i: expr, $m: expr) => { - match (imm8 >> 2) & 0x3 { - 0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12), - 1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13), - 2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14), - _ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15), - } - }; - } - match imm8 & 0x3 { - 0 => shuffle1!(0, 4, 8, 12), - 1 => shuffle1!(1, 5, 9, 13), - 2 => shuffle1!(2, 6, 10, 14), - _ => shuffle1!(3, 7, 11, 15), - } +#[cfg_attr(test, assert_instr(vshufps, MASK = 3))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shuffle_ps(a: __m512, b: __m512) -> __m512 { + static_assert_imm8!(MASK); + simd_shuffle16( + a, + b, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11) + 16, + ((MASK as u32 >> 6) & 0b11) + 16, + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 20, + ((MASK as u32 >> 6) & 0b11) + 20, + (MASK as u32 & 0b11) + 8, + ((MASK as u32 >> 2) & 0b11) + 8, + ((MASK as u32 >> 4) & 0b11) + 24, + ((MASK as u32 >> 6) & 0b11) + 24, + (MASK as u32 & 0b11) + 12, + ((MASK as u32 >> 2) & 0b11) + 12, + ((MASK as u32 >> 4) & 0b11) + 28, + ((MASK as u32 >> 6) & 0b11) + 28, + ], + ) } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22601,21 +22315,15 @@ pub unsafe fn _mm512_shuffle_ps(a: __m512, b: __m512, imm8: i32) -> __m512 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_ps&expand=5201) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufps, imm8 = 0))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shuffle_ps( +#[cfg_attr(test, assert_instr(vshufps, MASK = 3))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shuffle_ps( src: __m512, k: __mmask16, a: __m512, b: __m512, - imm8: i32, ) -> __m512 { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_ps(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm512_shuffle_ps::(a, b); transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) } @@ -22624,15 +22332,14 @@ pub unsafe fn _mm512_mask_shuffle_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_ps&expand=5202) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufps, imm8 = 0))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shuffle_ps(k: __mmask16, a: __m512, b: __m512, imm8: i32) -> __m512 { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_ps(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vshufps, MASK = 3))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shuffle_ps( + k: __mmask16, + a: __m512, + b: __m512, +) -> __m512 { + let r = _mm512_shuffle_ps::(a, b); let zero = _mm512_setzero_ps().as_f32x16(); transmute(simd_select_bitmask(k, r.as_f32x16(), zero)) } @@ -22642,21 +22349,15 @@ pub unsafe fn _mm512_maskz_shuffle_ps(k: __mmask16, a: __m512, b: __m512, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_ps&expand=5198) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufps, imm8 = 9))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shuffle_ps( +#[cfg_attr(test, assert_instr(vshufps, MASK = 3))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shuffle_ps( src: __m256, k: __mmask8, a: __m256, b: __m256, - imm8: i32, ) -> __m256 { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_ps(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm256_shuffle_ps::(a, b); transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) } @@ -22665,15 +22366,14 @@ pub unsafe fn _mm256_mask_shuffle_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_ps&expand=5199) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufps, imm8 = 9))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shuffle_ps(k: __mmask8, a: __m256, b: __m256, imm8: i32) -> __m256 { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_ps(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vshufps, MASK = 3))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shuffle_ps( + k: __mmask8, + a: __m256, + b: __m256, +) -> __m256 { + let r = _mm256_shuffle_ps::(a, b); let zero = _mm256_setzero_ps().as_f32x8(); transmute(simd_select_bitmask(k, r.as_f32x8(), zero)) } @@ -22683,21 +22383,15 @@ pub unsafe fn _mm256_maskz_shuffle_ps(k: __mmask8, a: __m256, b: __m256, imm8: i /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shuffle_ps&expand=5195) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufps, imm8 = 9))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_shuffle_ps( +#[cfg_attr(test, assert_instr(vshufps, MASK = 3))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_shuffle_ps( src: __m128, k: __mmask8, a: __m128, b: __m128, - imm8: i32, ) -> __m128 { - macro_rules! call { - ($imm8:expr) => { - _mm_shuffle_ps::<$imm8>(a, b) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm_shuffle_ps::(a, b); transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4())) } @@ -22706,15 +22400,10 @@ pub unsafe fn _mm_mask_shuffle_ps( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shuffle_ps&expand=5196) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufps, imm8 = 9))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_shuffle_ps(k: __mmask8, a: __m128, b: __m128, imm8: i32) -> __m128 { - macro_rules! call { - ($imm8:expr) => { - _mm_shuffle_ps::<$imm8>(a, b) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vshufps, MASK = 3))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_shuffle_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + let r = _mm_shuffle_ps::(a, b); let zero = _mm_setzero_ps().as_f32x4(); transmute(simd_select_bitmask(k, r.as_f32x4(), zero)) } @@ -22724,76 +22413,24 @@ pub unsafe fn _mm_maskz_shuffle_ps(k: __mmask8, a: __m128, b: __m128, imm8: i32) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_pd&expand=5192) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_shuffle_pd(a: __m512d, b: __m512d, imm8: i32) -> __m512d { - assert!(imm8 >= 0 && imm8 <= 255); - let imm8 = (imm8 & 0xFF) as u8; - macro_rules! shuffle8 { - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr) => { - simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]) - }; - } - macro_rules! shuffle7 { - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr) => { - match (imm8 >> 7) & 0x1 { - 0 => shuffle8!($a, $b, $c, $d, $e, $f, $g, 14), - _ => shuffle8!($a, $b, $c, $d, $e, $f, $g, 15), - } - }; - } - macro_rules! shuffle6 { - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => { - match (imm8 >> 6) & 0x1 { - 0 => shuffle7!($a, $b, $c, $d, $e, $f, 6), - _ => shuffle7!($a, $b, $c, $d, $e, $f, 7), - } - }; - } - macro_rules! shuffle5 { - ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr) => { - match (imm8 >> 5) & 0x1 { - 0 => shuffle6!($a, $b, $c, $d, $e, 12), - _ => shuffle6!($a, $b, $c, $d, $e, 13), - } - }; - } - macro_rules! shuffle4 { - ($a:expr, $b:expr, $c:expr, $d:expr) => { - match (imm8 >> 4) & 0x1 { - 0 => shuffle5!($a, $b, $c, $d, 4), - _ => shuffle5!($a, $b, $c, $d, 5), - } - }; - } - macro_rules! shuffle3 { - ($a:expr, $b:expr, $c:expr) => { - match (imm8 >> 3) & 0x1 { - 0 => shuffle4!($a, $b, $c, 10), - _ => shuffle4!($a, $b, $c, 11), - } - }; - } - macro_rules! shuffle2 { - ($a:expr, $b:expr) => { - match (imm8 >> 2) & 0x1 { - 0 => shuffle3!($a, $b, 2), - _ => shuffle3!($a, $b, 3), - } - }; - } - macro_rules! shuffle1 { - ($a:expr) => { - match (imm8 >> 1) & 0x1 { - 0 => shuffle2!($a, 8), - _ => shuffle2!($a, 9), - } - }; - } - match imm8 & 0x1 { - 0 => shuffle1!(0), - _ => shuffle1!(1), - } +#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_shuffle_pd(a: __m512d, b: __m512d) -> __m512d { + static_assert_imm8!(MASK); + simd_shuffle8( + a, + b, + [ + MASK as u32 & 0b1, + ((MASK as u32 >> 1) & 0b1) + 8, + ((MASK as u32 >> 2) & 0b1) + 2, + ((MASK as u32 >> 3) & 0b1) + 10, + ((MASK as u32 >> 4) & 0b1) + 4, + ((MASK as u32 >> 5) & 0b1) + 12, + ((MASK as u32 >> 6) & 0b1) + 6, + ((MASK as u32 >> 7) & 0b1) + 14, + ], + ) } /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22801,21 +22438,15 @@ pub unsafe fn _mm512_shuffle_pd(a: __m512d, b: __m512d, imm8: i32) -> __m512d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_pd&expand=5190) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_shuffle_pd( +#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm512_mask_shuffle_pd( src: __m512d, k: __mmask8, a: __m512d, b: __m512d, - imm8: i32, ) -> __m512d { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_pd(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm512_shuffle_pd::(a, b); transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) } @@ -22824,15 +22455,14 @@ pub unsafe fn _mm512_mask_shuffle_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_pd&expand=5191) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_maskz_shuffle_pd(k: __mmask8, a: __m512d, b: __m512d, imm8: i32) -> __m512d { - macro_rules! call { - ($imm8:expr) => { - _mm512_shuffle_pd(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm512_maskz_shuffle_pd( + k: __mmask8, + a: __m512d, + b: __m512d, +) -> __m512d { + let r = _mm512_shuffle_pd::(a, b); let zero = _mm512_setzero_pd().as_f64x8(); transmute(simd_select_bitmask(k, r.as_f64x8(), zero)) } @@ -22842,21 +22472,15 @@ pub unsafe fn _mm512_maskz_shuffle_pd(k: __mmask8, a: __m512d, b: __m512d, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_pd&expand=5187) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufpd, imm8 = 9))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm256_mask_shuffle_pd( +#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm256_mask_shuffle_pd( src: __m256d, k: __mmask8, a: __m256d, b: __m256d, - imm8: i32, ) -> __m256d { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_pd(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm256_shuffle_pd::(a, b); transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) } @@ -22865,15 +22489,14 @@ pub unsafe fn _mm256_mask_shuffle_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_pd&expand=5188) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufpd, imm8 = 9))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm256_maskz_shuffle_pd(k: __mmask8, a: __m256d, b: __m256d, imm8: i32) -> __m256d { - macro_rules! call { - ($imm8:expr) => { - _mm256_shuffle_pd(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm256_maskz_shuffle_pd( + k: __mmask8, + a: __m256d, + b: __m256d, +) -> __m256d { + let r = _mm256_shuffle_pd::(a, b); let zero = _mm256_setzero_pd().as_f64x4(); transmute(simd_select_bitmask(k, r.as_f64x4(), zero)) } @@ -22883,21 +22506,15 @@ pub unsafe fn _mm256_maskz_shuffle_pd(k: __mmask8, a: __m256d, b: __m256d, imm8: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shuffle_pd&expand=5184) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufpd, imm8 = 9))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm_mask_shuffle_pd( +#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))] +#[rustc_legacy_const_generics(4)] +pub unsafe fn _mm_mask_shuffle_pd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, - imm8: i32, ) -> __m128d { - macro_rules! call { - ($imm8:expr) => { - _mm_shuffle_pd(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); + let r = _mm_shuffle_pd::(a, b); transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2())) } @@ -22906,15 +22523,14 @@ pub unsafe fn _mm_mask_shuffle_pd( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shuffle_pd&expand=5185) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vshufpd, imm8 = 9))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm_maskz_shuffle_pd(k: __mmask8, a: __m128d, b: __m128d, imm8: i32) -> __m128d { - macro_rules! call { - ($imm8:expr) => { - _mm_shuffle_pd(a, b, $imm8) - }; - } - let r = constify_imm8_sae!(imm8, call); +#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn _mm_maskz_shuffle_pd( + k: __mmask8, + a: __m128d, + b: __m128d, +) -> __m128d { + let r = _mm_shuffle_pd::(a, b); let zero = _mm_setzero_pd().as_f64x2(); transmute(simd_select_bitmask(k, r.as_f64x2(), zero)) } @@ -42201,7 +41817,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_roundscale_ps() { let a = _mm512_set1_ps(1.1); - let r = _mm512_roundscale_ps(a, 0); + let r = _mm512_roundscale_ps::<0b00_00_00_00>(a); let e = _mm512_set1_ps(1.0); assert_eq_m512(r, e); } @@ -42209,10 +41825,10 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_roundscale_ps() { let a = _mm512_set1_ps(1.1); - let r = _mm512_mask_roundscale_ps(a, 0, a, 0); + let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a); let e = _mm512_set1_ps(1.1); assert_eq_m512(r, e); - let r = _mm512_mask_roundscale_ps(a, 0b11111111_11111111, a, 0); + let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111_11111111, a); let e = _mm512_set1_ps(1.0); assert_eq_m512(r, e); } @@ -42220,9 +41836,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_roundscale_ps() { let a = _mm512_set1_ps(1.1); - let r = _mm512_maskz_roundscale_ps(0, a, 0); + let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0, a); assert_eq_m512(r, _mm512_setzero_ps()); - let r = _mm512_maskz_roundscale_ps(0b11111111_11111111, a, 0); + let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111_11111111, a); let e = _mm512_set1_ps(1.0); assert_eq_m512(r, e); } @@ -42230,7 +41846,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_roundscale_ps() { let a = _mm256_set1_ps(1.1); - let r = _mm256_roundscale_ps(a, 0); + let r = _mm256_roundscale_ps::<0b00_00_00_00>(a); let e = _mm256_set1_ps(1.0); assert_eq_m256(r, e); } @@ -42238,10 +41854,10 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_roundscale_ps() { let a = _mm256_set1_ps(1.1); - let r = _mm256_mask_roundscale_ps(a, 0, a, 0); + let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a); let e = _mm256_set1_ps(1.1); assert_eq_m256(r, e); - let r = _mm256_mask_roundscale_ps(a, 0b11111111, a, 0); + let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111, a); let e = _mm256_set1_ps(1.0); assert_eq_m256(r, e); } @@ -42249,9 +41865,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_maskz_roundscale_ps() { let a = _mm256_set1_ps(1.1); - let r = _mm256_maskz_roundscale_ps(0, a, 0); + let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0, a); assert_eq_m256(r, _mm256_setzero_ps()); - let r = _mm256_maskz_roundscale_ps(0b11111111, a, 0); + let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111, a); let e = _mm256_set1_ps(1.0); assert_eq_m256(r, e); } @@ -42259,7 +41875,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_roundscale_ps() { let a = _mm_set1_ps(1.1); - let r = _mm_roundscale_ps(a, 0); + let r = _mm_roundscale_ps::<0b00_00_00_00>(a); let e = _mm_set1_ps(1.0); assert_eq_m128(r, e); } @@ -42267,10 +41883,10 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_roundscale_ps() { let a = _mm_set1_ps(1.1); - let r = _mm_mask_roundscale_ps(a, 0, a, 0); + let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a); let e = _mm_set1_ps(1.1); assert_eq_m128(r, e); - let r = _mm_mask_roundscale_ps(a, 0b00001111, a, 0); + let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0b00001111, a); let e = _mm_set1_ps(1.0); assert_eq_m128(r, e); } @@ -42278,9 +41894,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_maskz_roundscale_ps() { let a = _mm_set1_ps(1.1); - let r = _mm_maskz_roundscale_ps(0, a, 0); + let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0, a); assert_eq_m128(r, _mm_setzero_ps()); - let r = _mm_maskz_roundscale_ps(0b00001111, a, 0); + let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0b00001111, a); let e = _mm_set1_ps(1.0); assert_eq_m128(r, e); } @@ -42387,7 +42003,8 @@ mod tests { let a = _mm512_set1_ps(f32::NAN); let b = _mm512_set1_ps(f32::MAX); let c = _mm512_set1_epi32(i32::MAX); - let r = _mm512_fixupimm_ps(a, b, c, 5); + //let r = _mm512_fixupimm_ps(a, b, c, 5); + let r = _mm512_fixupimm_ps::<5>(a, b, c); let e = _mm512_set1_ps(0.0); assert_eq_m512(r, e); } @@ -42403,7 +42020,7 @@ mod tests { ); let b = _mm512_set1_ps(f32::MAX); let c = _mm512_set1_epi32(i32::MAX); - let r = _mm512_mask_fixupimm_ps(a, 0b11111111_00000000, b, c, 5); + let r = _mm512_mask_fixupimm_ps::<5>(a, 0b11111111_00000000, b, c); let e = _mm512_set_ps( 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., ); @@ -42421,7 +42038,7 @@ mod tests { ); let b = _mm512_set1_ps(f32::MAX); let c = _mm512_set1_epi32(i32::MAX); - let r = _mm512_maskz_fixupimm_ps(0b11111111_00000000, a, b, c, 5); + let r = _mm512_maskz_fixupimm_ps::<5>(0b11111111_00000000, a, b, c); let e = _mm512_set_ps( 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., ); @@ -42433,7 +42050,7 @@ mod tests { let a = _mm256_set1_ps(f32::NAN); let b = _mm256_set1_ps(f32::MAX); let c = _mm256_set1_epi32(i32::MAX); - let r = _mm256_fixupimm_ps(a, b, c, 5); + let r = _mm256_fixupimm_ps::<5>(a, b, c); let e = _mm256_set1_ps(0.0); assert_eq_m256(r, e); } @@ -42443,7 +42060,7 @@ mod tests { let a = _mm256_set1_ps(f32::NAN); let b = _mm256_set1_ps(f32::MAX); let c = _mm256_set1_epi32(i32::MAX); - let r = _mm256_mask_fixupimm_ps(a, 0b11111111, b, c, 5); + let r = _mm256_mask_fixupimm_ps::<5>(a, 0b11111111, b, c); let e = _mm256_set1_ps(0.0); assert_eq_m256(r, e); } @@ -42453,7 +42070,7 @@ mod tests { let a = _mm256_set1_ps(f32::NAN); let b = _mm256_set1_ps(f32::MAX); let c = _mm256_set1_epi32(i32::MAX); - let r = _mm256_maskz_fixupimm_ps(0b11111111, a, b, c, 5); + let r = _mm256_maskz_fixupimm_ps::<5>(0b11111111, a, b, c); let e = _mm256_set1_ps(0.0); assert_eq_m256(r, e); } @@ -42463,7 +42080,7 @@ mod tests { let a = _mm_set1_ps(f32::NAN); let b = _mm_set1_ps(f32::MAX); let c = _mm_set1_epi32(i32::MAX); - let r = _mm_fixupimm_ps(a, b, c, 5); + let r = _mm_fixupimm_ps::<5>(a, b, c); let e = _mm_set1_ps(0.0); assert_eq_m128(r, e); } @@ -42473,7 +42090,7 @@ mod tests { let a = _mm_set1_ps(f32::NAN); let b = _mm_set1_ps(f32::MAX); let c = _mm_set1_epi32(i32::MAX); - let r = _mm_mask_fixupimm_ps(a, 0b00001111, b, c, 5); + let r = _mm_mask_fixupimm_ps::<5>(a, 0b00001111, b, c); let e = _mm_set1_ps(0.0); assert_eq_m128(r, e); } @@ -42483,7 +42100,7 @@ mod tests { let a = _mm_set1_ps(f32::NAN); let b = _mm_set1_ps(f32::MAX); let c = _mm_set1_epi32(i32::MAX); - let r = _mm_maskz_fixupimm_ps(0b00001111, a, b, c, 5); + let r = _mm_maskz_fixupimm_ps::<5>(0b00001111, a, b, c); let e = _mm_set1_ps(0.0); assert_eq_m128(r, e); } @@ -42493,7 +42110,7 @@ mod tests { let a = _mm512_set1_epi32(1 << 2); let b = _mm512_set1_epi32(1 << 1); let c = _mm512_set1_epi32(1 << 0); - let r = _mm512_ternarylogic_epi32(a, b, c, 8); + let r = _mm512_ternarylogic_epi32::<8>(a, b, c); let e = _mm512_set1_epi32(0); assert_eq_m512i(r, e); } @@ -42503,9 +42120,9 @@ mod tests { let src = _mm512_set1_epi32(1 << 2); let a = _mm512_set1_epi32(1 << 1); let b = _mm512_set1_epi32(1 << 0); - let r = _mm512_mask_ternarylogic_epi32(src, 0, a, b, 8); + let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0, a, b); assert_eq_m512i(r, src); - let r = _mm512_mask_ternarylogic_epi32(src, 0b11111111_11111111, a, b, 8); + let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0b11111111_11111111, a, b); let e = _mm512_set1_epi32(0); assert_eq_m512i(r, e); } @@ -42515,9 +42132,9 @@ mod tests { let a = _mm512_set1_epi32(1 << 2); let b = _mm512_set1_epi32(1 << 1); let c = _mm512_set1_epi32(1 << 0); - let r = _mm512_maskz_ternarylogic_epi32(0, a, b, c, 9); + let r = _mm512_maskz_ternarylogic_epi32::<9>(0, a, b, c); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_ternarylogic_epi32(0b11111111_11111111, a, b, c, 8); + let r = _mm512_maskz_ternarylogic_epi32::<8>(0b11111111_11111111, a, b, c); let e = _mm512_set1_epi32(0); assert_eq_m512i(r, e); } @@ -42527,7 +42144,7 @@ mod tests { let a = _mm256_set1_epi32(1 << 2); let b = _mm256_set1_epi32(1 << 1); let c = _mm256_set1_epi32(1 << 0); - let r = _mm256_ternarylogic_epi32(a, b, c, 8); + let r = _mm256_ternarylogic_epi32::<8>(a, b, c); let e = _mm256_set1_epi32(0); assert_eq_m256i(r, e); } @@ -42537,9 +42154,9 @@ mod tests { let src = _mm256_set1_epi32(1 << 2); let a = _mm256_set1_epi32(1 << 1); let b = _mm256_set1_epi32(1 << 0); - let r = _mm256_mask_ternarylogic_epi32(src, 0, a, b, 8); + let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0, a, b); assert_eq_m256i(r, src); - let r = _mm256_mask_ternarylogic_epi32(src, 0b11111111, a, b, 8); + let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0b11111111, a, b); let e = _mm256_set1_epi32(0); assert_eq_m256i(r, e); } @@ -42549,9 +42166,9 @@ mod tests { let a = _mm256_set1_epi32(1 << 2); let b = _mm256_set1_epi32(1 << 1); let c = _mm256_set1_epi32(1 << 0); - let r = _mm256_maskz_ternarylogic_epi32(0, a, b, c, 9); + let r = _mm256_maskz_ternarylogic_epi32::<9>(0, a, b, c); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_ternarylogic_epi32(0b11111111, a, b, c, 8); + let r = _mm256_maskz_ternarylogic_epi32::<8>(0b11111111, a, b, c); let e = _mm256_set1_epi32(0); assert_eq_m256i(r, e); } @@ -42561,7 +42178,7 @@ mod tests { let a = _mm_set1_epi32(1 << 2); let b = _mm_set1_epi32(1 << 1); let c = _mm_set1_epi32(1 << 0); - let r = _mm_ternarylogic_epi32(a, b, c, 8); + let r = _mm_ternarylogic_epi32::<8>(a, b, c); let e = _mm_set1_epi32(0); assert_eq_m128i(r, e); } @@ -42571,9 +42188,9 @@ mod tests { let src = _mm_set1_epi32(1 << 2); let a = _mm_set1_epi32(1 << 1); let b = _mm_set1_epi32(1 << 0); - let r = _mm_mask_ternarylogic_epi32(src, 0, a, b, 8); + let r = _mm_mask_ternarylogic_epi32::<8>(src, 0, a, b); assert_eq_m128i(r, src); - let r = _mm_mask_ternarylogic_epi32(src, 0b00001111, a, b, 8); + let r = _mm_mask_ternarylogic_epi32::<8>(src, 0b00001111, a, b); let e = _mm_set1_epi32(0); assert_eq_m128i(r, e); } @@ -42583,9 +42200,9 @@ mod tests { let a = _mm_set1_epi32(1 << 2); let b = _mm_set1_epi32(1 << 1); let c = _mm_set1_epi32(1 << 0); - let r = _mm_maskz_ternarylogic_epi32(0, a, b, c, 9); + let r = _mm_maskz_ternarylogic_epi32::<9>(0, a, b, c); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_ternarylogic_epi32(0b00001111, a, b, c, 8); + let r = _mm_maskz_ternarylogic_epi32::<8>(0b00001111, a, b, c); let e = _mm_set1_epi32(0); assert_eq_m128i(r, e); } @@ -42691,7 +42308,7 @@ mod tests { 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007, ); let b = _mm512_set1_ps(-1.); - let r = _mm512_add_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); #[rustfmt::skip] let e = _mm512_setr_ps( -1., 0.5, 1., 2.5, @@ -42700,7 +42317,7 @@ mod tests { 11., 12.5, 13., -0.99999994, ); assert_eq_m512(r, e); - let r = _mm512_add_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm512_setr_ps( -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999, ); @@ -42713,14 +42330,13 @@ mod tests { 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007, ); let b = _mm512_set1_ps(-1.); - let r = _mm512_mask_add_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b); assert_eq_m512(r, a); - let r = _mm512_mask_add_round_ps( + let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( a, 0b11111111_00000000, a, b, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); #[rustfmt::skip] let e = _mm512_setr_ps( @@ -42738,13 +42354,12 @@ mod tests { 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007, ); let b = _mm512_set1_ps(-1.); - let r = _mm512_maskz_add_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b); assert_eq_m512(r, _mm512_setzero_ps()); - let r = _mm512_maskz_add_round_ps( + let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( 0b11111111_00000000, a, b, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); #[rustfmt::skip] let e = _mm512_setr_ps( @@ -49133,7 +48748,7 @@ mod tests { let b = _mm512_setr_ps( 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15., ); - let r = _mm512_shuffle_ps(a, b, 0x0F); + let r = _mm512_shuffle_ps::<0b00_00_11_11>(a, b); let e = _mm512_setr_ps( 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10., ); @@ -49148,9 +48763,9 @@ mod tests { let b = _mm512_setr_ps( 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15., ); - let r = _mm512_mask_shuffle_ps(a, 0, a, b, 0x0F); + let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0, a, b); assert_eq_m512(r, a); - let r = _mm512_mask_shuffle_ps(a, 0b11111111_11111111, a, b, 0x0F); + let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111_11111111, a, b); let e = _mm512_setr_ps( 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10., ); @@ -49165,9 +48780,9 @@ mod tests { let b = _mm512_setr_ps( 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15., ); - let r = _mm512_maskz_shuffle_ps(0, a, b, 0x0F); + let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0, a, b); assert_eq_m512(r, _mm512_setzero_ps()); - let r = _mm512_maskz_shuffle_ps(0b00000000_11111111, a, b, 0x0F); + let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0b00000000_11111111, a, b); let e = _mm512_setr_ps( 8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0., ); @@ -49178,9 +48793,9 @@ mod tests { unsafe fn test_mm256_mask_shuffle_ps() { let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.); - let r = _mm256_mask_shuffle_ps(a, 0, a, b, 0x0F); + let r = _mm256_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b); assert_eq_m256(r, a); - let r = _mm256_mask_shuffle_ps(a, 0b11111111, a, b, 0x0F); + let r = _mm256_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111, a, b); let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.); assert_eq_m256(r, e); } @@ -49189,9 +48804,9 @@ mod tests { unsafe fn test_mm256_maskz_shuffle_ps() { let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.); - let r = _mm256_maskz_shuffle_ps(0, a, b, 0x0F); + let r = _mm256_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b); assert_eq_m256(r, _mm256_setzero_ps()); - let r = _mm256_maskz_shuffle_ps(0b11111111, a, b, 0x0F); + let r = _mm256_maskz_shuffle_ps::<0b00_00_11_11>(0b11111111, a, b); let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.); assert_eq_m256(r, e); } @@ -49200,9 +48815,9 @@ mod tests { unsafe fn test_mm_mask_shuffle_ps() { let a = _mm_set_ps(1., 4., 5., 8.); let b = _mm_set_ps(2., 3., 6., 7.); - let r = _mm_mask_shuffle_ps(a, 0, a, b, 0x0F); + let r = _mm_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b); assert_eq_m128(r, a); - let r = _mm_mask_shuffle_ps(a, 0b00001111, a, b, 0x0F); + let r = _mm_mask_shuffle_ps::<0b00_00_11_11>(a, 0b00001111, a, b); let e = _mm_set_ps(7., 7., 1., 1.); assert_eq_m128(r, e); } @@ -49211,9 +48826,9 @@ mod tests { unsafe fn test_mm_maskz_shuffle_ps() { let a = _mm_set_ps(1., 4., 5., 8.); let b = _mm_set_ps(2., 3., 6., 7.); - let r = _mm_maskz_shuffle_ps(0, a, b, 0x0F); + let r = _mm_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b); assert_eq_m128(r, _mm_setzero_ps()); - let r = _mm_maskz_shuffle_ps(0b00001111, a, b, 0x0F); + let r = _mm_maskz_shuffle_ps::<0b00_00_11_11>(0b00001111, a, b); let e = _mm_set_ps(7., 7., 1., 1.); assert_eq_m128(r, e); } @@ -55377,4 +54992,35 @@ mod tests { let e: i32 = 1; assert_eq!(r, e); } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_shuffle_pd() { + let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.); + let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.); + let r = _mm512_shuffle_pd::<0b11_11_11_11>(a, b); + let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_shuffle_pd() { + let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.); + let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.); + let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0b11111111, a, b); + let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_shuffle_pd() { + let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.); + let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.); + let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b); + let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } } diff --git a/crates/core_arch/src/x86/macros.rs b/crates/core_arch/src/x86/macros.rs index 46d248bdc4ed8..2025a59d46079 100644 --- a/crates/core_arch/src/x86/macros.rs +++ b/crates/core_arch/src/x86/macros.rs @@ -1,4 +1,20 @@ //! Utility macros. +//! +// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is +// not a round number. +pub(crate) struct ValidateConstRound; +impl ValidateConstRound { + pub(crate) const VALID: () = { + let _ = 1 / ((IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11) as usize); + }; +} + +#[allow(unused)] +macro_rules! static_assert_rounding { + ($imm:ident) => { + let _ = $crate::core_arch::x86::macros::ValidateConstRound::<$imm>::VALID; + }; +} macro_rules! constify_imm6 { ($imm8:expr, $expand:ident) => { diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index 4243f6c8e24e8..8bf109628953c 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -2653,21 +2653,17 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr( all(test, any(not(target_os = "windows"), target_arch = "x86")), - assert_instr(shufps, imm8 = 1) + cfg_attr(test, assert_instr(shufps, MASK = 2)) // FIXME shufpd expected )] #[cfg_attr( all(test, all(target_os = "windows", target_arch = "x86_64")), - assert_instr(shufpd, imm8 = 1) + cfg_attr(test, assert_instr(shufpd, MASK = 1)) )] -#[rustc_args_required_const(2)] +#[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { - match imm8 & 0b11 { - 0b00 => simd_shuffle2(a, b, [0, 2]), - 0b01 => simd_shuffle2(a, b, [1, 2]), - 0b10 => simd_shuffle2(a, b, [0, 3]), - _ => simd_shuffle2(a, b, [1, 3]), - } +pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d) -> __m128d { + static_assert_imm8!(MASK); + simd_shuffle2(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) } /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower @@ -4852,7 +4848,7 @@ mod tests { let a = _mm_setr_pd(1., 2.); let b = _mm_setr_pd(3., 4.); let expected = _mm_setr_pd(1., 3.); - let r = _mm_shuffle_pd(a, b, 0); + let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b); assert_eq_m128d(r, expected); } diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index cf1b4b6220fde..a10ac23bbcaf2 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -2920,7 +2920,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_roundscale_pd() { let a = _mm512_set1_pd(1.1); - let r = _mm512_roundscale_pd(a, 0); + let r = _mm512_roundscale_pd::<0b00_00_00_00>(a); let e = _mm512_set1_pd(1.0); assert_eq_m512d(r, e); } @@ -2928,10 +2928,10 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_roundscale_pd() { let a = _mm512_set1_pd(1.1); - let r = _mm512_mask_roundscale_pd(a, 0, a, 0); + let r = _mm512_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a); let e = _mm512_set1_pd(1.1); assert_eq_m512d(r, e); - let r = _mm512_mask_roundscale_pd(a, 0b11111111, a, 0); + let r = _mm512_mask_roundscale_pd::<0b00_00_00_00>(a, 0b11111111, a); let e = _mm512_set1_pd(1.0); assert_eq_m512d(r, e); } @@ -2939,9 +2939,9 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_roundscale_pd() { let a = _mm512_set1_pd(1.1); - let r = _mm512_maskz_roundscale_pd(0, a, 0); + let r = _mm512_maskz_roundscale_pd::<0b00_00_00_00>(0, a); assert_eq_m512d(r, _mm512_setzero_pd()); - let r = _mm512_maskz_roundscale_pd(0b11111111, a, 0); + let r = _mm512_maskz_roundscale_pd::<0b00_00_00_00>(0b11111111, a); let e = _mm512_set1_pd(1.0); assert_eq_m512d(r, e); } @@ -2949,7 +2949,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_roundscale_pd() { let a = _mm256_set1_pd(1.1); - let r = _mm256_roundscale_pd(a, 0); + let r = _mm256_roundscale_pd::<0b00_00_00_00>(a); let e = _mm256_set1_pd(1.0); assert_eq_m256d(r, e); } @@ -2957,10 +2957,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_roundscale_pd() { let a = _mm256_set1_pd(1.1); - let r = _mm256_mask_roundscale_pd(a, 0, a, 0); - let e = _mm256_set1_pd(1.1); - assert_eq_m256d(r, e); - let r = _mm256_mask_roundscale_pd(a, 0b00001111, a, 0); + let r = _mm256_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_roundscale_pd::<0b00_00_00_00>(a, 0b00001111, a); let e = _mm256_set1_pd(1.0); assert_eq_m256d(r, e); } @@ -2968,9 +2967,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_maskz_roundscale_pd() { let a = _mm256_set1_pd(1.1); - let r = _mm256_maskz_roundscale_pd(0, a, 0); + let r = _mm256_maskz_roundscale_pd::<0b00_00_00_00>(0, a); assert_eq_m256d(r, _mm256_setzero_pd()); - let r = _mm256_maskz_roundscale_pd(0b00001111, a, 0); + let r = _mm256_maskz_roundscale_pd::<0b00_00_00_00>(0b00001111, a); let e = _mm256_set1_pd(1.0); assert_eq_m256d(r, e); } @@ -2978,7 +2977,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_roundscale_pd() { let a = _mm_set1_pd(1.1); - let r = _mm_roundscale_pd(a, 0); + let r = _mm_roundscale_pd::<0b00_00_00_00>(a); let e = _mm_set1_pd(1.0); assert_eq_m128d(r, e); } @@ -2986,10 +2985,10 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_roundscale_pd() { let a = _mm_set1_pd(1.1); - let r = _mm_mask_roundscale_pd(a, 0, a, 0); + let r = _mm_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a); let e = _mm_set1_pd(1.1); assert_eq_m128d(r, e); - let r = _mm_mask_roundscale_pd(a, 0b00000011, a, 0); + let r = _mm_mask_roundscale_pd::<0b00_00_00_00>(a, 0b00000011, a); let e = _mm_set1_pd(1.0); assert_eq_m128d(r, e); } @@ -2997,9 +2996,9 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_maskz_roundscale_pd() { let a = _mm_set1_pd(1.1); - let r = _mm_maskz_roundscale_pd(0, a, 0); + let r = _mm_maskz_roundscale_pd::<0b00_00_00_00>(0, a); assert_eq_m128d(r, _mm_setzero_pd()); - let r = _mm_maskz_roundscale_pd(0b00000011, a, 0); + let r = _mm_maskz_roundscale_pd::<0b00_00_00_00>(0b00000011, a); let e = _mm_set1_pd(1.0); assert_eq_m128d(r, e); } @@ -3102,7 +3101,7 @@ mod tests { let a = _mm512_set1_pd(f64::NAN); let b = _mm512_set1_pd(f64::MAX); let c = _mm512_set1_epi64(i32::MAX as i64); - let r = _mm512_fixupimm_pd(a, b, c, 5); + let r = _mm512_fixupimm_pd::<5>(a, b, c); let e = _mm512_set1_pd(0.0); assert_eq_m512d(r, e); } @@ -3112,7 +3111,7 @@ mod tests { let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.); let b = _mm512_set1_pd(f64::MAX); let c = _mm512_set1_epi64(i32::MAX as i64); - let r = _mm512_mask_fixupimm_pd(a, 0b11110000, b, c, 5); + let r = _mm512_mask_fixupimm_pd::<5>(a, 0b11110000, b, c); let e = _mm512_set_pd(0., 0., 0., 0., 1., 1., 1., 1.); assert_eq_m512d(r, e); } @@ -3122,7 +3121,7 @@ mod tests { let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.); let b = _mm512_set1_pd(f64::MAX); let c = _mm512_set1_epi64(i32::MAX as i64); - let r = _mm512_maskz_fixupimm_pd(0b11110000, a, b, c, 5); + let r = _mm512_maskz_fixupimm_pd::<5>(0b11110000, a, b, c); let e = _mm512_set_pd(0., 0., 0., 0., 0., 0., 0., 0.); assert_eq_m512d(r, e); } @@ -3132,7 +3131,7 @@ mod tests { let a = _mm256_set1_pd(f64::NAN); let b = _mm256_set1_pd(f64::MAX); let c = _mm256_set1_epi64x(i32::MAX as i64); - let r = _mm256_fixupimm_pd(a, b, c, 5); + let r = _mm256_fixupimm_pd::<5>(a, b, c); let e = _mm256_set1_pd(0.0); assert_eq_m256d(r, e); } @@ -3142,7 +3141,7 @@ mod tests { let a = _mm256_set1_pd(f64::NAN); let b = _mm256_set1_pd(f64::MAX); let c = _mm256_set1_epi64x(i32::MAX as i64); - let r = _mm256_mask_fixupimm_pd(a, 0b00001111, b, c, 5); + let r = _mm256_mask_fixupimm_pd::<5>(a, 0b00001111, b, c); let e = _mm256_set1_pd(0.0); assert_eq_m256d(r, e); } @@ -3152,7 +3151,7 @@ mod tests { let a = _mm256_set1_pd(f64::NAN); let b = _mm256_set1_pd(f64::MAX); let c = _mm256_set1_epi64x(i32::MAX as i64); - let r = _mm256_maskz_fixupimm_pd(0b00001111, a, b, c, 5); + let r = _mm256_maskz_fixupimm_pd::<5>(0b00001111, a, b, c); let e = _mm256_set1_pd(0.0); assert_eq_m256d(r, e); } @@ -3162,7 +3161,7 @@ mod tests { let a = _mm_set1_pd(f64::NAN); let b = _mm_set1_pd(f64::MAX); let c = _mm_set1_epi64x(i32::MAX as i64); - let r = _mm_fixupimm_pd(a, b, c, 5); + let r = _mm_fixupimm_pd::<5>(a, b, c); let e = _mm_set1_pd(0.0); assert_eq_m128d(r, e); } @@ -3172,7 +3171,7 @@ mod tests { let a = _mm_set1_pd(f64::NAN); let b = _mm_set1_pd(f64::MAX); let c = _mm_set1_epi64x(i32::MAX as i64); - let r = _mm_mask_fixupimm_pd(a, 0b00000011, b, c, 5); + let r = _mm_mask_fixupimm_pd::<5>(a, 0b00000011, b, c); let e = _mm_set1_pd(0.0); assert_eq_m128d(r, e); } @@ -3182,7 +3181,7 @@ mod tests { let a = _mm_set1_pd(f64::NAN); let b = _mm_set1_pd(f64::MAX); let c = _mm_set1_epi64x(i32::MAX as i64); - let r = _mm_maskz_fixupimm_pd(0b00000011, a, b, c, 5); + let r = _mm_maskz_fixupimm_pd::<5>(0b00000011, a, b, c); let e = _mm_set1_pd(0.0); assert_eq_m128d(r, e); } @@ -3192,7 +3191,7 @@ mod tests { let a = _mm512_set1_epi64(1 << 2); let b = _mm512_set1_epi64(1 << 1); let c = _mm512_set1_epi64(1 << 0); - let r = _mm512_ternarylogic_epi64(a, b, c, 8); + let r = _mm512_ternarylogic_epi64::<8>(a, b, c); let e = _mm512_set1_epi64(0); assert_eq_m512i(r, e); } @@ -3202,9 +3201,9 @@ mod tests { let src = _mm512_set1_epi64(1 << 2); let a = _mm512_set1_epi64(1 << 1); let b = _mm512_set1_epi64(1 << 0); - let r = _mm512_mask_ternarylogic_epi64(src, 0, a, b, 8); + let r = _mm512_mask_ternarylogic_epi64::<8>(src, 0, a, b); assert_eq_m512i(r, src); - let r = _mm512_mask_ternarylogic_epi64(src, 0b11111111, a, b, 8); + let r = _mm512_mask_ternarylogic_epi64::<8>(src, 0b11111111, a, b); let e = _mm512_set1_epi64(0); assert_eq_m512i(r, e); } @@ -3214,9 +3213,9 @@ mod tests { let a = _mm512_set1_epi64(1 << 2); let b = _mm512_set1_epi64(1 << 1); let c = _mm512_set1_epi64(1 << 0); - let r = _mm512_maskz_ternarylogic_epi64(0, a, b, c, 9); + let r = _mm512_maskz_ternarylogic_epi64::<8>(0, a, b, c); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_ternarylogic_epi64(0b11111111, a, b, c, 8); + let r = _mm512_maskz_ternarylogic_epi64::<8>(0b11111111, a, b, c); let e = _mm512_set1_epi64(0); assert_eq_m512i(r, e); } @@ -3226,7 +3225,7 @@ mod tests { let a = _mm256_set1_epi64x(1 << 2); let b = _mm256_set1_epi64x(1 << 1); let c = _mm256_set1_epi64x(1 << 0); - let r = _mm256_ternarylogic_epi64(a, b, c, 8); + let r = _mm256_ternarylogic_epi64::<8>(a, b, c); let e = _mm256_set1_epi64x(0); assert_eq_m256i(r, e); } @@ -3236,9 +3235,9 @@ mod tests { let src = _mm256_set1_epi64x(1 << 2); let a = _mm256_set1_epi64x(1 << 1); let b = _mm256_set1_epi64x(1 << 0); - let r = _mm256_mask_ternarylogic_epi64(src, 0, a, b, 8); + let r = _mm256_mask_ternarylogic_epi64::<8>(src, 0, a, b); assert_eq_m256i(r, src); - let r = _mm256_mask_ternarylogic_epi64(src, 0b00001111, a, b, 8); + let r = _mm256_mask_ternarylogic_epi64::<8>(src, 0b00001111, a, b); let e = _mm256_set1_epi64x(0); assert_eq_m256i(r, e); } @@ -3248,9 +3247,9 @@ mod tests { let a = _mm256_set1_epi64x(1 << 2); let b = _mm256_set1_epi64x(1 << 1); let c = _mm256_set1_epi64x(1 << 0); - let r = _mm256_maskz_ternarylogic_epi64(0, a, b, c, 9); + let r = _mm256_maskz_ternarylogic_epi64::<9>(0, a, b, c); assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm256_maskz_ternarylogic_epi64(0b00001111, a, b, c, 8); + let r = _mm256_maskz_ternarylogic_epi64::<8>(0b00001111, a, b, c); let e = _mm256_set1_epi64x(0); assert_eq_m256i(r, e); } @@ -3260,7 +3259,7 @@ mod tests { let a = _mm_set1_epi64x(1 << 2); let b = _mm_set1_epi64x(1 << 1); let c = _mm_set1_epi64x(1 << 0); - let r = _mm_ternarylogic_epi64(a, b, c, 8); + let r = _mm_ternarylogic_epi64::<8>(a, b, c); let e = _mm_set1_epi64x(0); assert_eq_m128i(r, e); } @@ -3270,9 +3269,9 @@ mod tests { let src = _mm_set1_epi64x(1 << 2); let a = _mm_set1_epi64x(1 << 1); let b = _mm_set1_epi64x(1 << 0); - let r = _mm_mask_ternarylogic_epi64(src, 0, a, b, 8); + let r = _mm_mask_ternarylogic_epi64::<8>(src, 0, a, b); assert_eq_m128i(r, src); - let r = _mm_mask_ternarylogic_epi64(src, 0b00000011, a, b, 8); + let r = _mm_mask_ternarylogic_epi64::<8>(src, 0b00000011, a, b); let e = _mm_set1_epi64x(0); assert_eq_m128i(r, e); } @@ -3282,9 +3281,9 @@ mod tests { let a = _mm_set1_epi64x(1 << 2); let b = _mm_set1_epi64x(1 << 1); let c = _mm_set1_epi64x(1 << 0); - let r = _mm_maskz_ternarylogic_epi64(0, a, b, c, 9); + let r = _mm_maskz_ternarylogic_epi64::<9>(0, a, b, c); assert_eq_m128i(r, _mm_setzero_si128()); - let r = _mm_maskz_ternarylogic_epi64(0b00000011, a, b, c, 8); + let r = _mm_maskz_ternarylogic_epi64::<8>(0b00000011, a, b, c); let e = _mm_set1_epi64x(0); assert_eq_m128i(r, e); } @@ -5308,10 +5307,10 @@ mod tests { unsafe fn test_mm512_add_round_pd() { let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007); let b = _mm512_set1_pd(-1.); - let r = _mm512_add_round_pd(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -1.0); assert_eq_m512d(r, e); - let r = _mm512_add_round_pd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let r = _mm512_add_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999999999999); assert_eq_m512d(r, e); } @@ -5320,14 +5319,12 @@ mod tests { unsafe fn test_mm512_mask_add_round_pd() { let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007); let b = _mm512_set1_pd(-1.); - let r = _mm512_mask_add_round_pd(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = _mm512_mask_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); assert_eq_m512d(r, a); - let r = _mm512_mask_add_round_pd( - a, - 0b11110000, - a, - b, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm512_mask_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11110000, a, b, ); let e = _mm512_setr_pd(8., 9.5, 10., 11.5, 11., 12.5, 13., -1.0); assert_eq_m512d(r, e); @@ -5337,13 +5334,11 @@ mod tests { unsafe fn test_mm512_maskz_add_round_pd() { let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007); let b = _mm512_set1_pd(-1.); - let r = _mm512_maskz_add_round_pd(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + let r = + _mm512_maskz_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); assert_eq_m512d(r, _mm512_setzero_pd()); - let r = _mm512_maskz_add_round_pd( - 0b11110000, - a, - b, - _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, + let r = _mm512_maskz_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11110000, a, b, ); let e = _mm512_setr_pd(0., 0., 0., 0., 11., 12.5, 13., -1.0); assert_eq_m512d(r, e); @@ -9715,70 +9710,13 @@ mod tests { assert_eq_m128d(r, e); } - #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_shuffle_pd() { - let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.); - let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.); - let r = _mm512_shuffle_pd( - a, - b, - 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7, - ); - let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.); - assert_eq_m512d(r, e); - } - - #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_shuffle_pd() { - let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.); - let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.); - let r = _mm512_mask_shuffle_pd( - a, - 0, - a, - b, - 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7, - ); - assert_eq_m512d(r, a); - let r = _mm512_mask_shuffle_pd( - a, - 0b11111111, - a, - b, - 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7, - ); - let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.); - assert_eq_m512d(r, e); - } - - #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_maskz_shuffle_pd() { - let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.); - let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.); - let r = _mm512_maskz_shuffle_pd( - 0, - a, - b, - 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7, - ); - assert_eq_m512d(r, _mm512_setzero_pd()); - let r = _mm512_maskz_shuffle_pd( - 0b00001111, - a, - b, - 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7, - ); - let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.); - assert_eq_m512d(r, e); - } - #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_shuffle_pd() { let a = _mm256_set_pd(1., 4., 5., 8.); let b = _mm256_set_pd(2., 3., 6., 7.); - let r = _mm256_mask_shuffle_pd(a, 0, a, b, 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm256_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b); assert_eq_m256d(r, a); - let r = _mm256_mask_shuffle_pd(a, 0b00001111, a, b, 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm256_mask_shuffle_pd::<0b11_11_11_11>(a, 0b00001111, a, b); let e = _mm256_set_pd(2., 1., 6., 5.); assert_eq_m256d(r, e); } @@ -9787,9 +9725,9 @@ mod tests { unsafe fn test_mm256_maskz_shuffle_pd() { let a = _mm256_set_pd(1., 4., 5., 8.); let b = _mm256_set_pd(2., 3., 6., 7.); - let r = _mm256_maskz_shuffle_pd(0, a, b, 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm256_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b); assert_eq_m256d(r, _mm256_setzero_pd()); - let r = _mm256_maskz_shuffle_pd(0b00001111, a, b, 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm256_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b); let e = _mm256_set_pd(2., 1., 6., 5.); assert_eq_m256d(r, e); } @@ -9798,9 +9736,9 @@ mod tests { unsafe fn test_mm_mask_shuffle_pd() { let a = _mm_set_pd(1., 4.); let b = _mm_set_pd(2., 3.); - let r = _mm_mask_shuffle_pd(a, 0, a, b, 1 << 0 | 1 << 1); + let r = _mm_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b); assert_eq_m128d(r, a); - let r = _mm_mask_shuffle_pd(a, 0b00000011, a, b, 1 << 0 | 1 << 1); + let r = _mm_mask_shuffle_pd::<0b11_11_11_11>(a, 0b00000011, a, b); let e = _mm_set_pd(2., 1.); assert_eq_m128d(r, e); } @@ -9809,9 +9747,9 @@ mod tests { unsafe fn test_mm_maskz_shuffle_pd() { let a = _mm_set_pd(1., 4.); let b = _mm_set_pd(2., 3.); - let r = _mm_maskz_shuffle_pd(0, a, b, 1 << 0 | 1 << 1); + let r = _mm_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b); assert_eq_m128d(r, _mm_setzero_pd()); - let r = _mm_maskz_shuffle_pd(0b00000011, a, b, 1 << 0 | 1 << 1); + let r = _mm_maskz_shuffle_pd::<0b11_11_11_11>(0b00000011, a, b); let e = _mm_set_pd(2., 1.); assert_eq_m128d(r, e); }