Auto merge of #3674 - TDecking:bmi, r=RalfJung

Implement LLVM x86 bmi intrinsics This implements the intrinsics for both the bmi1 and bmi2 ISA extensions. All of these intrinsics live inside the same namespace as far as LLVM is concerned, which is why it is arguably better to bundle the implementations of these two extensions.
rust-lang · Jun 20, 2024 · cc9699d · cc9699d
2 parents 314f7f2 + 459eada
commit cc9699d
Show file tree

Hide file tree

Showing 3 changed files with 330 additions and 0 deletions.
diff --git a/src/shims/x86/bmi.rs b/src/shims/x86/bmi.rs
@@ -0,0 +1,108 @@
+use rustc_span::Symbol;
+use rustc_target::spec::abi::Abi;
+
+use crate::*;
+
+impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
+pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
+    fn emulate_x86_bmi_intrinsic(
+        &mut self,
+        link_name: Symbol,
+        abi: Abi,
+        args: &[OpTy<'tcx>],
+        dest: &MPlaceTy<'tcx>,
+    ) -> InterpResult<'tcx, EmulateItemResult> {
+        let this = self.eval_context_mut();
+
+        // Prefix should have already been checked.
+        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.bmi.").unwrap();
+
+        // The intrinsics are suffixed with the bit size of their operands.
+        let (is_64_bit, unprefixed_name) = if unprefixed_name.ends_with("64") {
+            (true, unprefixed_name.strip_suffix(".64").unwrap_or(""))
+        } else {
+            (false, unprefixed_name.strip_suffix(".32").unwrap_or(""))
+        };
+
+        // All intrinsics of the "bmi" namespace belong to the "bmi2" ISA extension.
+        // The exception is "bextr", which belongs to "bmi1".
+        let target_feature = if unprefixed_name == "bextr" { "bmi1" } else { "bmi2" };
+        this.expect_target_feature_for_intrinsic(link_name, target_feature)?;
+
+        if is_64_bit && this.tcx.sess.target.arch != "x86_64" {
+            return Ok(EmulateItemResult::NotSupported);
+        }
+
+        let [left, right] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+        let left = this.read_scalar(left)?;
+        let right = this.read_scalar(right)?;
+
+        let left = if is_64_bit { left.to_u64()? } else { u64::from(left.to_u32()?) };
+        let right = if is_64_bit { right.to_u64()? } else { u64::from(right.to_u32()?) };
+
+        let result = match unprefixed_name {
+            // Extract a contigous range of bits from an unsigned integer.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bextr_u32
+            "bextr" => {
+                let start = u32::try_from(right & 0xff).unwrap();
+                let len = u32::try_from((right >> 8) & 0xff).unwrap();
+                let shifted = left.checked_shr(start).unwrap_or(0);
+                // Keep the `len` lowest bits of `shifted`, or all bits if `len` is too big.
+                if len >= 64 { shifted } else { shifted & 1u64.wrapping_shl(len).wrapping_sub(1) }
+            }
+            // Create a copy of an unsigned integer with bits above a certain index cleared.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bzhi_u32
+            "bzhi" => {
+                let index = u32::try_from(right & 0xff).unwrap();
+                // Keep the `index` lowest bits of `left`, or all bits if `index` is too big.
+                if index >= 64 { left } else { left & 1u64.wrapping_shl(index).wrapping_sub(1) }
+            }
+            // Extract bit values of an unsigned integer at positions marked by a mask.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u32
+            "pext" => {
+                let mut mask = right;
+                let mut i = 0u32;
+                let mut result = 0;
+                // Iterate over the mask one 1-bit at a time, from
+                // the least significant bit to the most significant bit.
+                while mask != 0 {
+                    // Extract the bit marked by the mask's least significant set bit
+                    // and put it at position `i` of the result.
+                    result |= u64::from(left & (1 << mask.trailing_zeros()) != 0) << i;
+                    i = i.wrapping_add(1);
+                    // Clear the least significant set bit.
+                    mask &= mask.wrapping_sub(1);
+                }
+                result
+            }
+            // Deposit bit values of an unsigned integer to positions marked by a mask.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u32
+            "pdep" => {
+                let mut mask = right;
+                let mut set = left;
+                let mut result = 0;
+                // Iterate over the mask one 1-bit at a time, from
+                // the least significant bit to the most significant bit.
+                while mask != 0 {
+                    // Put rightmost bit of `set` at the position of the current `mask` bit.
+                    result |= (set & 1) << mask.trailing_zeros();
+                    // Go to next bit of `set`.
+                    set >>= 1;
+                    // Clear the least significant set bit.
+                    mask &= mask.wrapping_sub(1);
+                }
+                result
+            }
+            _ => return Ok(EmulateItemResult::NotSupported),
+        };
+
+        let result = if is_64_bit {
+            Scalar::from_u64(result)
+        } else {
+            Scalar::from_u32(u32::try_from(result).unwrap())
+        };
+        this.write_scalar(result, dest)?;
+
+        Ok(EmulateItemResult::NeedsReturn)
+    }
+}
diff --git a/src/shims/x86/mod.rs b/src/shims/x86/mod.rs
@@ -14,6 +14,7 @@ use helpers::bool_to_simd_element;
 mod aesni;
 mod avx;
 mod avx2;
+mod bmi;
 mod sse;
 mod sse2;
 mod sse3;
@@ -113,6 +114,11 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 pclmulqdq(this, left, right, imm, dest)?;
             }
 
+            name if name.starts_with("bmi.") => {
+                return bmi::EvalContextExt::emulate_x86_bmi_intrinsic(
+                    this, link_name, abi, args, dest,
+                );
+            }
             name if name.starts_with("sse.") => {
                 return sse::EvalContextExt::emulate_x86_sse_intrinsic(
                     this, link_name, abi, args, dest,

diff --git a/tests/pass/shims/x86/intrinsics-x86-bmi.rs b/tests/pass/shims/x86/intrinsics-x86-bmi.rs
@@ -0,0 +1,216 @@
+// Ignore everything except x86 and x86_64
+// Any new targets that are added to CI should be ignored here.
+// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
+//@ignore-target-aarch64
+//@ignore-target-arm
+//@ignore-target-avr
+//@ignore-target-s390x
+//@ignore-target-thumbv7em
+//@ignore-target-wasm32
+//@compile-flags: -C target-feature=+bmi1,+bmi2
+
+#[cfg(target_arch = "x86")]
+use std::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use std::arch::x86_64::*;
+
+fn main() {
+    // BMI1 and BMI2 are independent from each other, so both must be checked.
+    assert!(is_x86_feature_detected!("bmi1"));
+    assert!(is_x86_feature_detected!("bmi2"));
+
+    unsafe {
+        test_bmi_32();
+        test_bmi_64();
+    }
+}
+
+/// Test the 32-bit variants of the intrinsics.
+unsafe fn test_bmi_32() {
+    unsafe fn test_bextr_u32() {
+        let r = _bextr_u32(0b0101_0000u32, 4, 4);
+        assert_eq!(r, 0b0000_0101u32);
+
+        for i in 0..16 {
+            assert_eq!(_bextr_u32(u32::MAX, i, 4), 0b1111);
+            assert_eq!(_bextr_u32(u32::MAX, 4, i), (1 << i) - 1);
+        }
+
+        // Ensure that indices larger than the bit count are covered.
+        // It is important to go above 32 in order to verify the bit selection
+        // of the instruction.
+
+        for i in 0..256 {
+            // If the index is out of bounds, the original input won't be changed, thus the `min(32)`.
+            assert_eq!(_bextr_u32(u32::MAX, 0, i).count_ones(), i.min(32));
+        }
+
+        for i in 0..256 {
+            assert_eq!(_bextr_u32(u32::MAX, i, 0), 0);
+        }
+
+        // Test cases with completly random values. These cases also test
+        // that the function works even if upper bits of the control value are set.
+        assert_eq!(_bextr2_u32(0x7408a392, 0x54ef705), 0x3a0451c);
+        assert_eq!(_bextr2_u32(0xbc5a3494, 0xdd193203), 0x178b4692);
+        assert_eq!(_bextr2_u32(0xc0332325, 0xf96e207), 0x1806646);
+    }
+    test_bextr_u32();
+
+    unsafe fn test_pext_u32() {
+        let n = 0b1011_1110_1001_0011u32;
+
+        let m0 = 0b0110_0011_1000_0101u32;
+        let s0 = 0b0000_0000_0011_0101u32;
+
+        let m1 = 0b1110_1011_1110_1111u32;
+        let s1 = 0b0001_0111_0100_0011u32;
+
+        // Testing of random values.
+        assert_eq!(_pext_u32(n, m0), s0);
+        assert_eq!(_pext_u32(n, m1), s1);
+        assert_eq!(_pext_u32(0x12345678, 0xff00fff0), 0x00012567);
+
+        // Testing of various identities.
+        assert_eq!(_pext_u32(u32::MAX, u32::MAX), u32::MAX);
+        assert_eq!(_pext_u32(u32::MAX, 0), 0);
+        assert_eq!(_pext_u32(0, u32::MAX), 0);
+    }
+    test_pext_u32();
+
+    unsafe fn test_pdep_u32() {
+        let n = 0b1011_1110_1001_0011u32;
+
+        let m0 = 0b0110_0011_1000_0101u32;
+        let s0 = 0b0000_0010_0000_0101u32;
+
+        let m1 = 0b1110_1011_1110_1111u32;
+        let s1 = 0b1110_1001_0010_0011u32;
+
+        // Testing of random values.
+        assert_eq!(_pdep_u32(n, m0), s0);
+        assert_eq!(_pdep_u32(n, m1), s1);
+        assert_eq!(_pdep_u32(0x00012567, 0xff00fff0), 0x12005670);
+
+        // Testing of various identities.
+        assert_eq!(_pdep_u32(u32::MAX, u32::MAX), u32::MAX);
+        assert_eq!(_pdep_u32(0, u32::MAX), 0);
+        assert_eq!(_pdep_u32(u32::MAX, 0), 0);
+    }
+    test_pdep_u32();
+
+    unsafe fn test_bzhi_u32() {
+        let n = 0b1111_0010u32;
+        let s = 0b0001_0010u32;
+        assert_eq!(_bzhi_u32(n, 5), s);
+
+        // Ensure that indices larger than the bit count are covered.
+        // It is important to go above 32 in order to verify the bit selection
+        // of the instruction.
+        for i in 0..=512 {
+            // The instruction only takes the lowest eight bits to generate the index, hence `i & 0xff`.
+            // If the index is out of bounds, the original input won't be changed, thus the `min(32)`.
+            let expected = 1u32.checked_shl((i & 0xff).min(32)).unwrap_or(0).wrapping_sub(1);
+            let actual = _bzhi_u32(u32::MAX, i);
+            assert_eq!(expected, actual);
+        }
+    }
+    test_bzhi_u32();
+}
+
+#[cfg(not(target_arch = "x86_64"))]
+unsafe fn test_bmi_64() {}
+
+/// Test the 64-bit variants of the intrinsics.
+#[cfg(target_arch = "x86_64")]
+unsafe fn test_bmi_64() {
+    unsafe fn test_bextr_u64() {
+        let r = _bextr_u64(0b0101_0000u64, 4, 4);
+        assert_eq!(r, 0b0000_0101u64);
+
+        for i in 0..16 {
+            assert_eq!(_bextr_u64(u64::MAX, i, 4), 0b1111);
+            assert_eq!(_bextr_u64(u64::MAX, 32, i), (1 << i) - 1);
+        }
+
+        // Ensure that indices larger than the bit count are covered.
+        // It is important to go above 64 in order to verify the bit selection
+        // of the instruction.
+
+        for i in 0..256 {
+            // If the index is out of bounds, the original input won't be changed, thus the `min(64)`.
+            assert_eq!(_bextr_u64(u64::MAX, 0, i).count_ones(), i.min(64));
+        }
+
+        for i in 0..256 {
+            assert_eq!(_bextr_u64(u64::MAX, i, 0), 0);
+        }
+
+        // Test cases with completly random values. These cases also test
+        // that the function works even if upper bits of the control value are set.
+        assert_eq!(_bextr2_u64(0x4ff6cfbcea75f055, 0x216642e228425719), 0x27fb67de75);
+        assert_eq!(_bextr2_u64(0xb05e991e6f6e1b6, 0xc76dd5d7f67dfc14), 0xb05e991e6f);
+        assert_eq!(_bextr2_u64(0x5a3a629e323d848f, 0x95ac507d20e7719), 0x2d1d314f19);
+    }
+    test_bextr_u64();
+
+    unsafe fn test_pext_u64() {
+        let n = 0b1011_1110_1001_0011u64;
+
+        let m0 = 0b0110_0011_1000_0101u64;
+        let s0 = 0b0000_0000_0011_0101u64;
+
+        let m1 = 0b1110_1011_1110_1111u64;
+        let s1 = 0b0001_0111_0100_0011u64;
+
+        // Testing of random values.
+        assert_eq!(_pext_u64(n, m0), s0);
+        assert_eq!(_pext_u64(n, m1), s1);
+        assert_eq!(_pext_u64(0x12345678, 0xff00fff0), 0x00012567);
+
+        // Testing of various identities.
+        assert_eq!(_pext_u64(u64::MAX, u64::MAX), u64::MAX);
+        assert_eq!(_pext_u64(u64::MAX, 0), 0);
+        assert_eq!(_pext_u64(0, u64::MAX), 0);
+    }
+    test_pext_u64();
+
+    unsafe fn test_pdep_u64() {
+        let n = 0b1011_1110_1001_0011u64;
+
+        let m0 = 0b0110_0011_1000_0101u64;
+        let s0 = 0b0000_0010_0000_0101u64;
+
+        let m1 = 0b1110_1011_1110_1111u64;
+        let s1 = 0b1110_1001_0010_0011u64;
+
+        // Testing of random values.
+        assert_eq!(_pdep_u64(n, m0), s0);
+        assert_eq!(_pdep_u64(n, m1), s1);
+        assert_eq!(_pdep_u64(0x00012567, 0xff00fff0), 0x12005670);
+
+        // Testing of various identities.
+        assert_eq!(_pdep_u64(u64::MAX, u64::MAX), u64::MAX);
+        assert_eq!(_pdep_u64(0, u64::MAX), 0);
+        assert_eq!(_pdep_u64(u64::MAX, 0), 0);
+    }
+    test_pdep_u64();
+
+    unsafe fn test_bzhi_u64() {
+        let n = 0b1111_0010u64;
+        let s = 0b0001_0010u64;
+        assert_eq!(_bzhi_u64(n, 5), s);
+
+        // Ensure that indices larger than the bit count are covered.
+        // It is important to go above 255 in order to verify the bit selection
+        // of the instruction.
+        for i in 0..=512 {
+            // The instruction only takes the lowest eight bits to generate the index, hence `i & 0xff`.
+            // If the index is out of bounds, the original input won't be changed, thus the `min(64)`.
+            let expected = 1u64.checked_shl((i & 0xff).min(64)).unwrap_or(0).wrapping_sub(1);
+            let actual = _bzhi_u64(u64::MAX, i);
+            assert_eq!(expected, actual);
+        }
+    }
+    test_bzhi_u64();
+}