Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Merge pull request #179 from NVIDIA/feature/atomic_refactor
Browse files Browse the repository at this point in the history
Refactor <atomic> and move implementation to libcxx
  • Loading branch information
wmaxey authored Aug 4, 2021
2 parents 48d213a + 6efb206 commit ca45a79
Show file tree
Hide file tree
Showing 21 changed files with 3,104 additions and 2,436 deletions.
54 changes: 54 additions & 0 deletions .upstream-tests/test/cuda/bad_atomic_alignment.pass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// UNSUPPORTED: libcpp-has-no-threads, pre-sm-60
// UNSUPPORTED: windows && pre-sm-70

// <cuda/atomic>

// cuda::atomic<key>

// Original test issue:
// https://github.com/NVIDIA/libcudacxx/issues/160

#include <cuda/atomic>

template <typename T>
__host__ __device__
constexpr bool unused(T &&) {return true;}

int main(int argc, char ** argv)
{
// Test default aligned user type
{
struct key {
int32_t a;
int32_t b;
};
static_assert(alignof(key) == 4, "");
cuda::atomic<key> k;
auto r = k.load();
k.store(r);
(void)k.exchange(r);
unused(r);
}
// Test forcibly aligned user type
{
struct alignas(8) key {
int32_t a;
int32_t b;
};
static_assert(alignof(key) == 8, "");
cuda::atomic<key> k;
auto r = k.load();
k.store(r);
(void)k.exchange(r);
unused(r);
}
return 0;
}
2 changes: 2 additions & 0 deletions .upstream-tests/test/cuda/pipeline_arrive_on.pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

// Remove after bump to version 4
#define _LIBCUDACXX_CUDA_ABI_VERSION 3
// TODO: Remove pointless comparison suppression when compiler fixes short-circuiting
#pragma nv_diag_suppress 186

#pragma nv_diag_suppress static_var_with_dynamic_init
#pragma nv_diag_suppress declared_but_not_referenced
Expand Down
3 changes: 3 additions & 0 deletions .upstream-tests/test/cuda/pipeline_arrive_on_abi_v2.pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@

#define _LIBCUDACXX_CUDA_ABI_VERSION 2

// TODO: Remove pointless comparison suppression when compiler fixes short-circuiting
#pragma nv_diag_suppress 186

#pragma nv_diag_suppress static_var_with_dynamic_init
#pragma nv_diag_suppress declared_but_not_referenced

Expand Down
3 changes: 3 additions & 0 deletions .upstream-tests/test/cuda/pipeline_group_concept.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@

// UNSUPPORTED: pre-sm-70

// TODO: Remove pointless comparison suppression when compiler fixes short-circuiting
#pragma nv_diag_suppress 186

#include <cuda/pipeline>

template <typename T_size, typename T_thread_rank>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ struct TestFn {
__host__ __device__
void operator()() const {
typedef cuda::std::atomic<T> A;
A t;
A t{};
bool b1 = cuda::std::atomic_is_lock_free(static_cast<const A*>(&t));
volatile A vt;
volatile A vt{};
bool b2 = cuda::std::atomic_is_lock_free(static_cast<const volatile A*>(&vt));
assert(b1 == b2);
}
Expand Down
2 changes: 2 additions & 0 deletions .upstream-tests/test/std/utilities/time/time.cal/euclidian.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
//
//===----------------------------------------------------------------------===//

#pragma nv_diag_suppress 186

#include <type_traits>


Expand Down
238 changes: 133 additions & 105 deletions codegen/codegen.cpp

Large diffs are not rendered by default.

79 changes: 48 additions & 31 deletions include/cuda/std/atomic
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#undef ATOMIC_VAR_INIT
#endif //__CUDACC_RTC__

#include "cassert"
#include "cstddef"
#include "cstdint"
#include "type_traits"
Expand All @@ -46,16 +47,26 @@

#include "detail/__pragma_push"

#include "detail/__atomic"
#include "detail/__threading_support"

#undef _LIBCUDACXX_HAS_GCC_ATOMIC_IMP
#undef _LIBCUDACXX_HAS_C_ATOMIC_IMP

#include "detail/libcxx/include/atomic"

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA

using std::__detail::thread_scope;
using std::__detail::thread_scope_system;
using std::__detail::thread_scope_device;
using std::__detail::thread_scope_block;
using std::__detail::thread_scope_thread;

namespace __detail {
using std::__detail::__thread_scope_block_tag;
using std::__detail::__thread_scope_device_tag;
using std::__detail::__thread_scope_system_tag;
using std::__detail::__atomic_signal_fence_cuda;
using std::__detail::__atomic_thread_fence_cuda;
}

using memory_order = std::memory_order;

constexpr memory_order memory_order_relaxed = std::memory_order_relaxed;
Expand All @@ -67,7 +78,7 @@ constexpr memory_order memory_order_seq_cst = std::memory_order_seq_cst;

// atomic<T>

template <class _Tp, thread_scope _Sco = thread_scope_system>
template <class _Tp, thread_scope _Sco = thread_scope::thread_scope_system>
struct atomic
: public std::__atomic_base<_Tp, _Sco>
{
Expand All @@ -87,15 +98,15 @@ struct atomic
__host__ __device__
_Tp fetch_max(const _Tp & __op, memory_order __m = memory_order_seq_cst) volatile noexcept
{
return detail::__atomic_fetch_max_cuda(&this->__a_.__a_value, __op,
__m, detail::__scope_tag<_Sco>());
return std::__detail::__atomic_fetch_max_cuda(&this->__a_.__a_value, __op,
__m, std::__detail::__scope_tag<_Sco>());
}

__host__ __device__
_Tp fetch_min(const _Tp & __op, memory_order __m = memory_order_seq_cst) volatile noexcept
{
return detail::__atomic_fetch_min_cuda(&this->__a_.__a_value, __op,
__m, detail::__scope_tag<_Sco>());
return std::__detail::__atomic_fetch_min_cuda(&this->__a_.__a_value, __op,
__m, std::__detail::__scope_tag<_Sco>());
}
};

Expand Down Expand Up @@ -159,31 +170,37 @@ struct atomic<_Tp*, _Sco>
_Tp* operator-=(ptrdiff_t __op) noexcept {return fetch_sub(__op) - __op;}
};

inline __host__ __device__ void atomic_thread_fence(memory_order __m, thread_scope _Scope = thread_scope_system) {
#ifdef __CUDA_ARCH__
switch(_Scope) {
case thread_scope_system:
detail::__atomic_thread_fence_cuda((int)__m, detail::__thread_scope_system_tag());
break;
case thread_scope_device:
detail::__atomic_thread_fence_cuda((int)__m, detail::__thread_scope_device_tag());
break;
case thread_scope_block:
detail::__atomic_thread_fence_cuda((int)__m, detail::__thread_scope_block_tag());
break;
}
#else
(void) _Scope;
::std::atomic_thread_fence((::std::memory_order)__m);
#endif
inline __host__ __device__ void atomic_thread_fence(memory_order __m, thread_scope _Scope = thread_scope::thread_scope_system) {
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
switch(_Scope) {
case thread_scope::thread_scope_system:
__detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_system_tag());
break;
case thread_scope::thread_scope_device:
__detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_device_tag());
break;
case thread_scope::thread_scope_block:
__detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_block_tag());
break;
}
),
NV_IS_HOST, (
(void) _Scope;
::std::atomic_thread_fence((::std::memory_order)__m);
)
)
}

inline __host__ __device__ void atomic_signal_fence(memory_order __m) {
#ifdef __CUDA_ARCH__
detail::__atomic_signal_fence_cuda((int)__m);
#else
::std::atomic_signal_fence((::std::memory_order)__m);
#endif
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
__detail::__atomic_signal_fence_cuda((int)__m);
),
NV_IS_HOST, (
::std::atomic_signal_fence((::std::memory_order)__m);
)
)
}

_LIBCUDACXX_END_NAMESPACE_CUDA
Expand Down
Loading

0 comments on commit ca45a79

Please sign in to comment.