Merge pull request #179 from NVIDIA/feature/atomic_refactor

Refactor <atomic> and move implementation to libcxx
NVIDIA · Aug 4, 2021 · ca45a79 · ca45a79
2 parents 48d213a + 6efb206
commit ca45a79
Show file tree

Hide file tree

Showing 21 changed files with 3,104 additions and 2,436 deletions.
diff --git a/.upstream-tests/test/cuda/bad_atomic_alignment.pass.cpp b/.upstream-tests/test/cuda/bad_atomic_alignment.pass.cpp
@@ -0,0 +1,54 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// UNSUPPORTED: libcpp-has-no-threads, pre-sm-60
+// UNSUPPORTED: windows && pre-sm-70
+
+// <cuda/atomic>
+
+// cuda::atomic<key>
+
+// Original test issue:
+// https://github.com/NVIDIA/libcudacxx/issues/160
+
+#include <cuda/atomic>
+
+template <typename T>
+__host__ __device__
+constexpr bool unused(T &&) {return true;}
+
+int main(int argc, char ** argv)
+{
+  // Test default aligned user type
+  {
+    struct key {
+      int32_t a;
+      int32_t b;
+    };
+    static_assert(alignof(key) == 4, "");
+    cuda::atomic<key> k;
+    auto r = k.load();
+    k.store(r);
+    (void)k.exchange(r);
+    unused(r);
+  }
+  // Test forcibly aligned user type
+  {
+    struct alignas(8) key {
+      int32_t a;
+      int32_t b;
+    };
+    static_assert(alignof(key) == 8, "");
+    cuda::atomic<key> k;
+    auto r = k.load();
+    k.store(r);
+    (void)k.exchange(r);
+    unused(r);
+  }
+  return 0;
+}
diff --git a/.upstream-tests/test/cuda/pipeline_arrive_on.pass.cpp b/.upstream-tests/test/cuda/pipeline_arrive_on.pass.cpp
@@ -11,6 +11,8 @@
 
 // Remove after bump to version 4
 #define _LIBCUDACXX_CUDA_ABI_VERSION 3
+// TODO: Remove pointless comparison suppression when compiler fixes short-circuiting
+#pragma nv_diag_suppress 186
 
 #pragma nv_diag_suppress static_var_with_dynamic_init
 #pragma nv_diag_suppress declared_but_not_referenced

diff --git a/.upstream-tests/test/cuda/pipeline_arrive_on_abi_v2.pass.cpp b/.upstream-tests/test/cuda/pipeline_arrive_on_abi_v2.pass.cpp
@@ -11,6 +11,9 @@
 
 #define _LIBCUDACXX_CUDA_ABI_VERSION 2
 
+// TODO: Remove pointless comparison suppression when compiler fixes short-circuiting
+#pragma nv_diag_suppress 186
+
 #pragma nv_diag_suppress static_var_with_dynamic_init
 #pragma nv_diag_suppress declared_but_not_referenced
 

diff --git a/.upstream-tests/test/cuda/pipeline_group_concept.h b/.upstream-tests/test/cuda/pipeline_group_concept.h
@@ -9,6 +9,9 @@
 
 // UNSUPPORTED: pre-sm-70
 
+// TODO: Remove pointless comparison suppression when compiler fixes short-circuiting
+#pragma nv_diag_suppress 186
+
 #include <cuda/pipeline>
 
 template <typename T_size, typename T_thread_rank>

diff --git a/...tomics/atomics.types.operations/atomics.types.operations.req/atomic_is_lock_free.pass.cpp b/...tomics/atomics.types.operations/atomics.types.operations.req/atomic_is_lock_free.pass.cpp
@@ -31,9 +31,9 @@ struct TestFn {
   __host__ __device__
   void operator()() const {
     typedef cuda::std::atomic<T> A;
-    A t;
+    A t{};
     bool b1 = cuda::std::atomic_is_lock_free(static_cast<const A*>(&t));
-    volatile A vt;
+    volatile A vt{};
     bool b2 = cuda::std::atomic_is_lock_free(static_cast<const volatile A*>(&vt));
     assert(b1 == b2);
   }

diff --git a/.upstream-tests/test/std/utilities/time/time.cal/euclidian.h b/.upstream-tests/test/std/utilities/time/time.cal/euclidian.h
@@ -6,6 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#pragma nv_diag_suppress 186
+
 #include <type_traits>
 
 

diff --git a/codegen/codegen.cpp b/codegen/codegen.cpp
diff --git a/include/cuda/std/atomic b/include/cuda/std/atomic
@@ -37,6 +37,7 @@
     #undef ATOMIC_VAR_INIT
 #endif //__CUDACC_RTC__
 
+#include "cassert"
 #include "cstddef"
 #include "cstdint"
 #include "type_traits"
@@ -46,16 +47,26 @@
 
 #include "detail/__pragma_push"
 
-#include "detail/__atomic"
 #include "detail/__threading_support"
 
-#undef _LIBCUDACXX_HAS_GCC_ATOMIC_IMP
-#undef _LIBCUDACXX_HAS_C_ATOMIC_IMP
-
 #include "detail/libcxx/include/atomic"
 
 _LIBCUDACXX_BEGIN_NAMESPACE_CUDA
 
+using std::__detail::thread_scope;
+using std::__detail::thread_scope_system;
+using std::__detail::thread_scope_device;
+using std::__detail::thread_scope_block;
+using std::__detail::thread_scope_thread;
+
+namespace __detail {
+using std::__detail::__thread_scope_block_tag;
+using std::__detail::__thread_scope_device_tag;
+using std::__detail::__thread_scope_system_tag;
+using std::__detail::__atomic_signal_fence_cuda;
+using std::__detail::__atomic_thread_fence_cuda;
+}
+
 using memory_order = std::memory_order;
 
 constexpr memory_order memory_order_relaxed = std::memory_order_relaxed;
@@ -67,7 +78,7 @@ constexpr memory_order memory_order_seq_cst = std::memory_order_seq_cst;
 
 // atomic<T>
 
-template <class _Tp, thread_scope _Sco = thread_scope_system>
+template <class _Tp, thread_scope _Sco = thread_scope::thread_scope_system>
 struct atomic
     : public std::__atomic_base<_Tp, _Sco>
 {
@@ -87,15 +98,15 @@ struct atomic
     __host__ __device__
     _Tp fetch_max(const _Tp & __op, memory_order __m = memory_order_seq_cst) volatile noexcept
     {
-        return detail::__atomic_fetch_max_cuda(&this->__a_.__a_value, __op,
-                                              __m, detail::__scope_tag<_Sco>());
+        return std::__detail::__atomic_fetch_max_cuda(&this->__a_.__a_value, __op,
+                                              __m, std::__detail::__scope_tag<_Sco>());
     }
 
     __host__ __device__
     _Tp fetch_min(const _Tp & __op, memory_order __m = memory_order_seq_cst) volatile noexcept
     {
-        return detail::__atomic_fetch_min_cuda(&this->__a_.__a_value, __op,
-                                              __m, detail::__scope_tag<_Sco>());
+        return std::__detail::__atomic_fetch_min_cuda(&this->__a_.__a_value, __op,
+                                              __m, std::__detail::__scope_tag<_Sco>());
     }
 };
 
@@ -159,31 +170,37 @@ struct atomic<_Tp*, _Sco>
     _Tp* operator-=(ptrdiff_t __op) noexcept          {return fetch_sub(__op) - __op;}
 };
 
-inline __host__ __device__ void atomic_thread_fence(memory_order __m, thread_scope _Scope = thread_scope_system) {
-#ifdef __CUDA_ARCH__
-    switch(_Scope) {
-    case thread_scope_system:
-        detail::__atomic_thread_fence_cuda((int)__m, detail::__thread_scope_system_tag());
-        break;
-    case thread_scope_device:
-        detail::__atomic_thread_fence_cuda((int)__m, detail::__thread_scope_device_tag());
-        break;
-    case thread_scope_block:
-        detail::__atomic_thread_fence_cuda((int)__m, detail::__thread_scope_block_tag());
-        break;
-    }
-#else
-    (void) _Scope;
-    ::std::atomic_thread_fence((::std::memory_order)__m);
-#endif
+inline __host__ __device__ void atomic_thread_fence(memory_order __m, thread_scope _Scope = thread_scope::thread_scope_system) {
+    NV_DISPATCH_TARGET(
+        NV_IS_DEVICE, (
+            switch(_Scope) {
+            case thread_scope::thread_scope_system:
+                __detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_system_tag());
+                break;
+            case thread_scope::thread_scope_device:
+                __detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_device_tag());
+                break;
+            case thread_scope::thread_scope_block:
+                __detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_block_tag());
+                break;
+            }
+        ),
+        NV_IS_HOST, (
+            (void) _Scope;
+            ::std::atomic_thread_fence((::std::memory_order)__m);
+        )
+    )
 }
 
 inline __host__ __device__ void atomic_signal_fence(memory_order __m) {
-#ifdef __CUDA_ARCH__
-    detail::__atomic_signal_fence_cuda((int)__m);
-#else
-    ::std::atomic_signal_fence((::std::memory_order)__m);
-#endif
+    NV_DISPATCH_TARGET(
+        NV_IS_DEVICE, (
+            __detail::__atomic_signal_fence_cuda((int)__m);
+        ),
+        NV_IS_HOST, (
+            ::std::atomic_signal_fence((::std::memory_order)__m);
+        )
+    )
 }
 
 _LIBCUDACXX_END_NAMESPACE_CUDA
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,6 +6,8 @@ @@
     //
     //===----------------------------------------------------------------------===//
+    #pragma nv_diag_suppress 186
     #include <type_traits>
@@ Expand Down @@