From c792499991f5e2be9b8cfac665096eb2030f755a Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Fri, 21 Apr 2023 05:23:19 +0900 Subject: [PATCH] x86_64: Add portable_atomic_vmovdqa_atomic cfg --- .github/workflows/ci.yml | 11 ++ src/imp/atomic128/detect/x86_64.rs | 1 + src/imp/atomic128/x86_64.rs | 166 ++++++++++++++++++++--------- tools/build.sh | 4 + 4 files changed, 129 insertions(+), 53 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a55a9fe5..78308734 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -190,6 +190,12 @@ jobs: RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+cmpxchg16b RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+cmpxchg16b if: (matrix.target == '' || startsWith(matrix.target, 'x86_64')) && !startsWith(matrix.os, 'macos') + # Sandy Bridge (the first Intel chip that introduced AVX) with portable_atomic_vmovdqa_atomic cfg + - run: tools/test.sh -vv $TARGET $DOCTEST_XCOMPILE $BUILD_STD $REPORT_TIME + env: + RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic + RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic + if: (matrix.target == '' || startsWith(matrix.target, 'x86_64')) # +lse # As of QEMU 7.2, QEMU has not yet implemented FEAT_LSE2: https://linaro.atlassian.net/browse/QEMU-300 # FEAT_LSE2 is tested on Cirrus CI's aarch64 macOS VM. @@ -390,6 +396,11 @@ jobs: env: RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+cmpxchg16b RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+cmpxchg16b + # Sandy Bridge (the first Intel chip that introduced AVX) with portable_atomic_vmovdqa_atomic cfg + - run: tools/test.sh -vv 2>&1 | ts -i '%.s ' + env: + RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic + RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic codegen: runs-on: ubuntu-latest diff --git a/src/imp/atomic128/detect/x86_64.rs b/src/imp/atomic128/detect/x86_64.rs index 007e5e46..afa9c35b 100644 --- a/src/imp/atomic128/detect/x86_64.rs +++ b/src/imp/atomic128/detect/x86_64.rs @@ -4,6 +4,7 @@ any( not(target_feature = "sse"), any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + portable_atomic_vmovdqa_atomic, miri, portable_atomic_sanitize_thread, ), diff --git a/src/imp/atomic128/x86_64.rs b/src/imp/atomic128/x86_64.rs index 20a05d1a..8abc07ce 100644 --- a/src/imp/atomic128/x86_64.rs +++ b/src/imp/atomic128/x86_64.rs @@ -9,6 +9,7 @@ // // Generated asm: // - x86_64 (+cmpxchg16b) https://godbolt.org/z/KahrWeW9G +// - x86_64 (+cmpxchg16b,+avx,vmovdqa_atomic) https://godbolt.org/z/KjMr4qWj1 include!("macros.rs"); @@ -37,12 +38,18 @@ macro_rules! debug_assert_cmpxchg16b { } }; } -#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] +#[cfg(any( + not(any(portable_atomic_no_outline_atomics, target_env = "sgx")), + all(portable_atomic_vmovdqa_atomic, target_feature = "avx"), +))] #[cfg(target_feature = "sse")] macro_rules! debug_assert_vmovdqa_atomic { () => {{ debug_assert_cmpxchg16b!(); - debug_assert!(detect::detect().has_vmovdqa_atomic()); + #[cfg(not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")))] + { + debug_assert!(detect::detect().has_vmovdqa_atomic()); + } }}; } @@ -139,7 +146,10 @@ unsafe fn cmpxchg16b(dst: *mut u128, old: u128, new: u128) -> (u128, bool) { // // Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html -#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] +#[cfg(any( + not(any(portable_atomic_no_outline_atomics, target_env = "sgx")), + all(portable_atomic_vmovdqa_atomic, target_feature = "avx"), +))] #[cfg(target_feature = "sse")] #[target_feature(enable = "avx")] #[inline] @@ -161,7 +171,10 @@ unsafe fn atomic_load_vmovdqa(src: *mut u128) -> u128 { core::mem::transmute(out) } } -#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] +#[cfg(any( + not(any(portable_atomic_no_outline_atomics, target_env = "sgx")), + all(portable_atomic_vmovdqa_atomic, target_feature = "avx"), +))] #[cfg(target_feature = "sse")] #[target_feature(enable = "avx")] #[inline] @@ -198,8 +211,15 @@ unsafe fn atomic_store_vmovdqa(dst: *mut u128, val: u128, order: Ordering) { #[cfg(not(all( any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), - any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + any( + all( + any(portable_atomic_no_outline_atomics, target_env = "sgx"), + not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")), + ), + not(target_feature = "sse"), + ), )))] +#[cfg(not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")))] macro_rules! load_store_detect { ( vmovdqa = $vmovdqa:ident @@ -249,28 +269,48 @@ unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 { // SGX doesn't support CPUID. #[cfg(all( any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), - any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + any( + all( + any(portable_atomic_no_outline_atomics, target_env = "sgx"), + not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")), + ), + not(target_feature = "sse"), + ), ))] // SAFETY: the caller must uphold the safety contract. // cfg guarantees that CMPXCHG16B is available at compile-time. unsafe { // cmpxchg16b is always SeqCst. - atomic_load_cmpxchg16b(src) + _atomic_load_cmpxchg16b(src) } #[cfg(not(all( any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), - any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + any( + all( + any(portable_atomic_no_outline_atomics, target_env = "sgx"), + not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")), + ), + not(target_feature = "sse"), + ), )))] - // SAFETY: the caller must uphold the safety contract. - unsafe { - ifunc!(unsafe fn(src: *mut u128) -> u128 { - load_store_detect! { - vmovdqa = atomic_load_vmovdqa - cmpxchg16b = atomic_load_cmpxchg16b - // Use SeqCst because cmpxchg16b and atomic load by vmovdqa is always SeqCst. - fallback = atomic_load_seqcst - } - }) + { + #[cfg(all(portable_atomic_vmovdqa_atomic, target_feature = "avx"))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_load_vmovdqa(src) + } + #[cfg(not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + ifunc!(unsafe fn(src: *mut u128) -> u128 { + load_store_detect! { + vmovdqa = atomic_load_vmovdqa + cmpxchg16b = _atomic_load_cmpxchg16b + // Use SeqCst because cmpxchg16b and atomic load by vmovdqa is always SeqCst. + fallback = atomic_load_seqcst + } + }) + } } } #[cfg_attr( @@ -278,7 +318,7 @@ unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 { target_feature(enable = "cmpxchg16b") )] #[inline] -unsafe fn atomic_load_cmpxchg16b(src: *mut u128) -> u128 { +unsafe fn _atomic_load_cmpxchg16b(src: *mut u128) -> u128 { debug_assert!(src as usize % 16 == 0); debug_assert_cmpxchg16b!(); @@ -327,52 +367,72 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { // SGX doesn't support CPUID. #[cfg(all( any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), - any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + any( + all( + any(portable_atomic_no_outline_atomics, target_env = "sgx"), + not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")), + ), + not(target_feature = "sse"), + ), ))] // SAFETY: the caller must uphold the safety contract. // cfg guarantees that CMPXCHG16B is available at compile-time. unsafe { // cmpxchg16b is always SeqCst. let _ = order; - atomic_store_cmpxchg16b(dst, val); + _atomic_store_cmpxchg16b(dst, val); } #[cfg(not(all( any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), - any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + any( + all( + any(portable_atomic_no_outline_atomics, target_env = "sgx"), + not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")), + ), + not(target_feature = "sse"), + ), )))] - // SAFETY: the caller must uphold the safety contract. - unsafe { - #[cfg(target_feature = "sse")] - fn_alias! { - #[target_feature(enable = "avx")] - unsafe fn(dst: *mut u128, val: u128); - // atomic store by vmovdqa has at least release semantics. - atomic_store_vmovdqa_non_seqcst = atomic_store_vmovdqa(Ordering::Release); - atomic_store_vmovdqa_seqcst = atomic_store_vmovdqa(Ordering::SeqCst); + { + #[cfg(all(portable_atomic_vmovdqa_atomic, target_feature = "avx"))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_store_vmovdqa(dst, val, order); } - match order { - // Relaxed and Release stores are equivalent in all implementations - // that may be called here (vmovdqa, asm-based cmpxchg16b, and fallback). - // core::arch's cmpxchg16b will never called here. - Ordering::Relaxed | Ordering::Release => { - ifunc!(unsafe fn(dst: *mut u128, val: u128) { - load_store_detect! { - vmovdqa = atomic_store_vmovdqa_non_seqcst - cmpxchg16b = atomic_store_cmpxchg16b - fallback = atomic_store_non_seqcst - } - }); + #[cfg(not(all(portable_atomic_vmovdqa_atomic, target_feature = "avx")))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + #[cfg(target_feature = "sse")] + fn_alias! { + #[target_feature(enable = "avx")] + unsafe fn(dst: *mut u128, val: u128); + // atomic store by vmovdqa has at least release semantics. + atomic_store_vmovdqa_non_seqcst = atomic_store_vmovdqa(Ordering::Release); + atomic_store_vmovdqa_seqcst = atomic_store_vmovdqa(Ordering::SeqCst); } - Ordering::SeqCst => { - ifunc!(unsafe fn(dst: *mut u128, val: u128) { - load_store_detect! { - vmovdqa = atomic_store_vmovdqa_seqcst - cmpxchg16b = atomic_store_cmpxchg16b - fallback = atomic_store_seqcst - } - }); + match order { + // Relaxed and Release stores are equivalent in all implementations + // that may be called here (vmovdqa, asm-based cmpxchg16b, and fallback). + // core::arch's cmpxchg16b will never called here. + Ordering::Relaxed | Ordering::Release => { + ifunc!(unsafe fn(dst: *mut u128, val: u128) { + load_store_detect! { + vmovdqa = atomic_store_vmovdqa_non_seqcst + cmpxchg16b = _atomic_store_cmpxchg16b + fallback = atomic_store_non_seqcst + } + }); + } + Ordering::SeqCst => { + ifunc!(unsafe fn(dst: *mut u128, val: u128) { + load_store_detect! { + vmovdqa = atomic_store_vmovdqa_seqcst + cmpxchg16b = _atomic_store_cmpxchg16b + fallback = atomic_store_seqcst + } + }); + } + _ => unreachable!("{:?}", order), } - _ => unreachable!("{:?}", order), } } } @@ -380,7 +440,7 @@ unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), target_feature(enable = "cmpxchg16b") )] -unsafe fn atomic_store_cmpxchg16b(dst: *mut u128, val: u128) { +unsafe fn _atomic_store_cmpxchg16b(dst: *mut u128, val: u128) { // SAFETY: the caller must uphold the safety contract. unsafe { // cmpxchg16b is always SeqCst. diff --git a/tools/build.sh b/tools/build.sh index 94999dc0..bc747026 100755 --- a/tools/build.sh +++ b/tools/build.sh @@ -99,6 +99,7 @@ known_cfgs=( portable_atomic_s_mode portable_atomic_disable_fiq portable_atomic_no_outline_atomics + portable_atomic_vmovdqa_atomic # Not public APIs portable_atomic_test_outline_atomics_detect_false @@ -461,6 +462,9 @@ build() { x_cargo "${args[@]}" "$@" ;; esac + # Sandy Bridge (the first Intel chip that introduced AVX) with portable_atomic_vmovdqa_atomic cfg + RUSTFLAGS="${target_rustflags} -C target-cpu=sandybridge -C target-feature=+cmpxchg16b --cfg portable_atomic_vmovdqa_atomic" \ + x_cargo "${args[@]}" --target-dir target/vmovdqa_atomic "$@" ;; aarch64* | arm64*) # macOS is skipped because it is +lse,+lse2 by default