From 7d802a6628827678d2a3140d06c2a48d68557d59 Mon Sep 17 00:00:00 2001 From: Wesley Maxey Date: Tue, 13 Oct 2020 13:37:22 -0700 Subject: [PATCH] Implement memory_order_consume operations as acquire, move support include to --- include/cuda/std/detail/__atomic | 4 ++-- libcxx/include/atomic | 5 +++++ libcxx/include/support/win32/atomic_msvc.h | 22 ++++++++++++++-------- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/include/cuda/std/detail/__atomic b/include/cuda/std/detail/__atomic index e5fe05034d..14f49434c2 100644 --- a/include/cuda/std/detail/__atomic +++ b/include/cuda/std/detail/__atomic @@ -44,7 +44,6 @@ #define __ATOMIC_THREAD 10 #endif //__ATOMIC_BLOCK - _LIBCUDACXX_BEGIN_NAMESPACE_CUDA namespace detail { @@ -102,7 +101,8 @@ namespace detail { _LIBCUDACXX_END_NAMESPACE_CUDA -#ifdef _MSC_VER +#if defined(_LIBCUDACXX_COMPILER_MSVC) + // Inject atomic intrinsics built from MSVC compiler intrinsics #include "libcxx/include/support/win32/atomic_msvc.h" #endif diff --git a/libcxx/include/atomic b/libcxx/include/atomic index c34cec008b..be235a64dc 100644 --- a/libcxx/include/atomic +++ b/libcxx/include/atomic @@ -555,6 +555,11 @@ void atomic_signal_fence(memory_order m) noexcept; #include #include #include <__pragma_push> + +#if defined(_LIBCUDACXX_COMPILER_MSVC) +#include "support/win32/atomic_msvc.h" +#endif + #endif //__cuda_std__ #if defined(_LIBCUDACXX_USE_PRAGMA_GCC_SYSTEM_HEADER) diff --git a/libcxx/include/support/win32/atomic_msvc.h b/libcxx/include/support/win32/atomic_msvc.h index 11cb4f6f01..6b1f2da88e 100644 --- a/libcxx/include/support/win32/atomic_msvc.h +++ b/libcxx/include/support/win32/atomic_msvc.h @@ -66,7 +66,7 @@ void __atomic_load_relaxed(const volatile _Type *__ptr, _Type *__ret) { template void __atomic_load(const volatile _Type *__ptr, _Type *__ret, int __memorder) { switch (__memorder) { - case __ATOMIC_SEQ_CST: _Memory_barrier(); + case __ATOMIC_SEQ_CST: _Memory_barrier(); _LIBCUDACXX_FALLTHROUGH(); case __ATOMIC_CONSUME: case __ATOMIC_ACQUIRE: __atomic_load_relaxed(__ptr, __ret); _Compiler_or_memory_barrier(); break; case __ATOMIC_RELAXED: __atomic_load_relaxed(__ptr, __ret); break; @@ -98,7 +98,7 @@ template void __atomic_store(volatile _Type *__ptr, _Type *__val, int __memorder) { switch (__memorder) { case __ATOMIC_RELEASE: _Compiler_or_memory_barrier(); __atomic_store_relaxed(__ptr, __val); break; - case __ATOMIC_SEQ_CST: _Memory_barrier(); + case __ATOMIC_SEQ_CST: _Memory_barrier(); _LIBCUDACXX_FALLTHROUGH(); case __ATOMIC_RELAXED: __atomic_store_relaxed(__ptr, __val); break; default: assert(0); } @@ -149,7 +149,8 @@ bool __atomic_compare_exchange(_Type volatile *__ptr, _Type *__expected, const _ bool success = false; switch (detail::__stronger_order_cuda(__success_memorder, __failure_memorder)) { case __ATOMIC_RELEASE: _Compiler_or_memory_barrier(); success = __atomic_compare_exchange_relaxed(__ptr, __expected, __desired); break; - case __ATOMIC_ACQ_REL: _Compiler_or_memory_barrier(); + case __ATOMIC_ACQ_REL: _Compiler_or_memory_barrier(); _LIBCUDACXX_FALLTHROUGH(); + case __ATOMIC_CONSUME: case __ATOMIC_ACQUIRE: success = __atomic_compare_exchange_relaxed(__ptr, __expected, __desired); _Compiler_or_memory_barrier(); break; case __ATOMIC_SEQ_CST: _Memory_barrier(); success = __atomic_compare_exchange_relaxed(__ptr, __expected, __desired); _Compiler_or_memory_barrier(); break; case __ATOMIC_RELAXED: success = __atomic_compare_exchange_relaxed(__ptr, __expected, __desired); break; @@ -182,7 +183,8 @@ template void __atomic_exchange(_Type volatile *__ptr, const _Type *__val, _Type *__ret, int __memorder) { switch (__memorder) { case __ATOMIC_RELEASE: _Compiler_or_memory_barrier(); __atomic_exchange_relaxed(__ptr, __val, __ret);break; - case __ATOMIC_ACQ_REL: _Compiler_or_memory_barrier(); + case __ATOMIC_ACQ_REL: _Compiler_or_memory_barrier(); _LIBCUDACXX_FALLTHROUGH(); + case __ATOMIC_CONSUME: case __ATOMIC_ACQUIRE: __atomic_exchange_relaxed(__ptr, __val, __ret); _Compiler_or_memory_barrier(); break; case __ATOMIC_SEQ_CST: _Memory_barrier(); __atomic_exchange_relaxed(__ptr, __val, __ret); _Compiler_or_memory_barrier(); break; case __ATOMIC_RELAXED: __atomic_exchange_relaxed(__ptr, __val, __ret); break; @@ -217,7 +219,8 @@ _Type __atomic_fetch_add(_Type volatile *__ptr, _Delta __val, int __memorder) { switch (__memorder) { case __ATOMIC_RELEASE: _Compiler_or_memory_barrier(); __atomic_fetch_add_relaxed(__ptr, &__val, __dest);break; - case __ATOMIC_ACQ_REL: _Compiler_or_memory_barrier(); + case __ATOMIC_ACQ_REL: _Compiler_or_memory_barrier(); _LIBCUDACXX_FALLTHROUGH(); + case __ATOMIC_CONSUME: case __ATOMIC_ACQUIRE: __atomic_fetch_add_relaxed(__ptr, &__val, __dest); _Compiler_or_memory_barrier(); break; case __ATOMIC_SEQ_CST: _Memory_barrier(); __atomic_fetch_add_relaxed(__ptr, &__val, __dest); _Compiler_or_memory_barrier(); break; case __ATOMIC_RELAXED: __atomic_fetch_add_relaxed(__ptr, &__val, __dest); break; @@ -258,7 +261,8 @@ _Type __atomic_fetch_and(_Type volatile *__ptr, _Delta __val, int __memorder) { switch (__memorder) { case __ATOMIC_RELEASE: _Compiler_or_memory_barrier(); __atomic_fetch_and_relaxed(__ptr, &__val, __dest);break; - case __ATOMIC_ACQ_REL: _Compiler_or_memory_barrier(); + case __ATOMIC_ACQ_REL: _Compiler_or_memory_barrier(); _LIBCUDACXX_FALLTHROUGH(); + case __ATOMIC_CONSUME: case __ATOMIC_ACQUIRE: __atomic_fetch_and_relaxed(__ptr, &__val, __dest); _Compiler_or_memory_barrier(); break; case __ATOMIC_SEQ_CST: _Memory_barrier(); __atomic_fetch_and_relaxed(__ptr, &__val, __dest); _Compiler_or_memory_barrier(); break; case __ATOMIC_RELAXED: __atomic_fetch_and_relaxed(__ptr, &__val, __dest); break; @@ -294,7 +298,8 @@ _Type __atomic_fetch_xor(_Type volatile *__ptr, _Delta __val, int __memorder) { switch (__memorder) { case __ATOMIC_RELEASE: _Compiler_or_memory_barrier(); __atomic_fetch_xor_relaxed(__ptr, &__val, __dest);break; - case __ATOMIC_ACQ_REL: _Compiler_or_memory_barrier(); + case __ATOMIC_ACQ_REL: _Compiler_or_memory_barrier(); _LIBCUDACXX_FALLTHROUGH(); + case __ATOMIC_CONSUME: case __ATOMIC_ACQUIRE: __atomic_fetch_xor_relaxed(__ptr, &__val, __dest); _Compiler_or_memory_barrier(); break; case __ATOMIC_SEQ_CST: _Memory_barrier(); __atomic_fetch_xor_relaxed(__ptr, &__val, __dest); _Compiler_or_memory_barrier(); break; case __ATOMIC_RELAXED: __atomic_fetch_xor_relaxed(__ptr, &__val, __dest); break; @@ -330,7 +335,8 @@ _Type __atomic_fetch_or(_Type volatile *__ptr, _Delta __val, int __memorder) { switch (__memorder) { case __ATOMIC_RELEASE: _Compiler_or_memory_barrier(); __atomic_fetch_or_relaxed(__ptr, &__val, __dest);break; - case __ATOMIC_ACQ_REL: _Compiler_or_memory_barrier(); + case __ATOMIC_ACQ_REL: _Compiler_or_memory_barrier(); _LIBCUDACXX_FALLTHROUGH(); + case __ATOMIC_CONSUME: case __ATOMIC_ACQUIRE: __atomic_fetch_or_relaxed(__ptr, &__val, __dest); _Compiler_or_memory_barrier(); break; case __ATOMIC_SEQ_CST: _Memory_barrier(); __atomic_fetch_or_relaxed(__ptr, &__val, __dest); _Compiler_or_memory_barrier(); break; case __ATOMIC_RELAXED: __atomic_fetch_or_relaxed(__ptr, &__val, __dest); break;