Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Merge pull request #207 from NVIDIA/bugfix/atomic_gcc
Browse files Browse the repository at this point in the history
Fix GCC/Clang only compilation of <cuda/std/atomic>
  • Loading branch information
wmaxey authored Sep 28, 2021
2 parents 41c94bd + 21563c7 commit 83cd4e7
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 77 deletions.
20 changes: 6 additions & 14 deletions include/cuda/std/atomic
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#undef ATOMIC_VAR_INIT
#endif //__CUDACC_RTC__


#include "cassert"
#include "cstddef"
#include "cstdint"
Expand All @@ -63,8 +64,6 @@ namespace __detail {
using std::__detail::__thread_scope_block_tag;
using std::__detail::__thread_scope_device_tag;
using std::__detail::__thread_scope_system_tag;
using std::__detail::__atomic_signal_fence_cuda;
using std::__detail::__atomic_thread_fence_cuda;
}

using memory_order = std::memory_order;
Expand Down Expand Up @@ -173,32 +172,25 @@ inline __host__ __device__ void atomic_thread_fence(memory_order __m, thread_sco
NV_IS_DEVICE, (
switch(_Scope) {
case thread_scope::thread_scope_system:
__detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_system_tag());
std::__detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_system_tag());
break;
case thread_scope::thread_scope_device:
__detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_device_tag());
std::__detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_device_tag());
break;
case thread_scope::thread_scope_block:
__detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_block_tag());
std::__detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_block_tag());
break;
}
),
NV_IS_HOST, (
(void) _Scope;
::std::atomic_thread_fence((::std::memory_order)__m);
std::atomic_thread_fence(__m);
)
)
}

inline __host__ __device__ void atomic_signal_fence(memory_order __m) {
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
__detail::__atomic_signal_fence_cuda((int)__m);
),
NV_IS_HOST, (
::std::atomic_signal_fence((::std::memory_order)__m);
)
)
std::atomic_signal_fence(__m);
}

_LIBCUDACXX_END_NAMESPACE_CUDA
Expand Down
2 changes: 2 additions & 0 deletions include/cuda/std/detail/__config
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@
#define __ELF__
#endif

#define _LIBCUDACXX_HAS_CUDA_ATOMIC_EXT

#include "libcxx/include/__config"

#if defined(__CUDA_ARCH__)
Expand Down
4 changes: 4 additions & 0 deletions libcxx/include/atomic
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,10 @@ __cxx_atomic_assign_volatile(_Tp volatile& __a_value, _Tv volatile const& __val)

// Headers are wrapped like so: (cuda::std::|std::)detail
namespace __detail {
#if defined(_LIBCUDACXX_HAS_CUDA_ATOMIC_EXT)
# include "support/atomic/atomic_scopes.h"
#endif

#if defined(_LIBCUDACXX_HAS_CUDA_ATOMIC_IMPL)
# include "support/atomic/atomic_cuda.h"
#elif defined(_LIBCUDACXX_HAS_MSVC_ATOMIC_IMPL)
Expand Down
28 changes: 28 additions & 0 deletions libcxx/include/support/atomic/atomic_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,34 @@ inline auto __cxx_atomic_fetch_xor(_Tp* __a, _Td __pattern,
__cxx_atomic_order_to_int(__order));
}

template <typename _Tp, typename _Td>
inline auto __cxx_atomic_fetch_max(_Tp* __a, _Td __val,
memory_order __order) -> __cxx_atomic_underlying_t<_Tp> {
auto __expected = __cxx_atomic_load(__a, memory_order_relaxed);
auto __desired = __expected > __val ? __expected : __val;

while(__desired == __val &&
!__cxx_atomic_compare_exchange_strong(__a, &__expected, __desired, __order, __order)) {
__desired = __expected > __val ? __expected : __val;
}

return __expected;
}

template <typename _Tp, typename _Td>
inline auto __cxx_atomic_fetch_min(_Tp* __a, _Td __val,
memory_order __order) -> __cxx_atomic_underlying_t<_Tp> {
auto __expected = __cxx_atomic_load(__a, memory_order_relaxed);
auto __desired = __expected < __val ? __expected : __val;

while(__desired == __val &&
!__cxx_atomic_compare_exchange_strong(__a, &__expected, __desired, __order, __order)) {
__desired = __expected < __val ? __expected : __val;
}

return __expected;
}

inline constexpr
bool __cxx_atomic_is_lock_free(size_t __x) {
#if defined(_LIBCUDACXX_NO_RUNTIME_LOCK_FREE)
Expand Down
65 changes: 2 additions & 63 deletions libcxx/include/support/atomic/atomic_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,6 @@
#define __ATOMIC_SEQ_CST 5
#endif //__ATOMIC_RELAXED

#ifndef __ATOMIC_BLOCK
#define __ATOMIC_SYSTEM 0 // 0 indicates default
#define __ATOMIC_DEVICE 1
#define __ATOMIC_BLOCK 2
#define __ATOMIC_THREAD 10
#endif //__ATOMIC_BLOCK

inline __host__ __device__ int __stronger_order_cuda(int __a, int __b) {
int const __max = __a > __b ? __a : __b;
if(__max != __ATOMIC_RELEASE)
Expand All @@ -52,42 +45,6 @@ inline __host__ __device__ int __stronger_order_cuda(int __a, int __b) {
return __xform[__a < __b ? __a : __b];
}

enum thread_scope {
thread_scope_system = __ATOMIC_SYSTEM,
thread_scope_device = __ATOMIC_DEVICE,
thread_scope_block = __ATOMIC_BLOCK,
thread_scope_thread = __ATOMIC_THREAD
};

#define _LIBCUDACXX_ATOMIC_SCOPE_TYPE ::cuda::thread_scope
#define _LIBCUDACXX_ATOMIC_SCOPE_DEFAULT ::cuda::thread_scope::system

struct __thread_scope_thread_tag { };
struct __thread_scope_block_tag { };
struct __thread_scope_device_tag { };
struct __thread_scope_system_tag { };

template<int _Scope> struct __scope_enum_to_tag { };
/* This would be the implementation once an actual thread-scope backend exists.
template<> struct __scope_enum_to_tag<(int)thread_scope_thread> {
using type = __thread_scope_thread_tag; };
Until then: */
template<> struct __scope_enum_to_tag<(int)thread_scope_thread> {
using type = __thread_scope_block_tag; };
template<> struct __scope_enum_to_tag<(int)thread_scope_block> {
using type = __thread_scope_block_tag; };
template<> struct __scope_enum_to_tag<(int)thread_scope_device> {
using type = __thread_scope_device_tag; };
template<> struct __scope_enum_to_tag<(int)thread_scope_system> {
using type = __thread_scope_system_tag; };

template <int _Scope>
_LIBCUDACXX_INLINE_VISIBILITY auto constexpr __scope_tag() ->
typename __scope_enum_to_tag<_Scope>::type {
return typename __scope_enum_to_tag<_Scope>::type();
}
// END TODO

// Wrap host atomic implementations into a sub-namespace
namespace __host {
#if defined(_LIBCUDACXX_COMPILER_MSVC)
Expand Down Expand Up @@ -385,16 +342,7 @@ __host__ __device__
NV_IS_DEVICE, (
return __atomic_fetch_max_cuda(__cxx_get_underlying_device_atomic(__a), __val, __order, __scope_tag<_Sco>());
), (
// IS_HOST
_Tp __expected = __cxx_atomic_load(__a, memory_order_relaxed);
_Tp __desired = __expected > __val ? __expected : __val;

while(__desired == __val &&
!__cxx_atomic_compare_exchange_strong(__a, &__expected, __desired, __order, __order)) {
__desired = __expected > __val ? __expected : __val;
}

return __expected;
return __host::__cxx_atomic_fetch_max(&__a->__a_value, __val, __order);
)
)
}
Expand All @@ -406,16 +354,7 @@ __host__ __device__
NV_IS_DEVICE, (
return __atomic_fetch_min_cuda(__cxx_get_underlying_device_atomic(__a), __val, __order, __scope_tag<_Sco>());
), (
// IS_HOST
_Tp __expected = __cxx_atomic_load(__a, memory_order_relaxed);
_Tp __desired = __expected < __val ? __expected : __val;

while(__desired == __val &&
!__cxx_atomic_compare_exchange_strong(__a, &__expected, __desired, __order, __order)) {
__desired = __expected < __val ? __expected : __val;
}

return __expected;
return __host::__cxx_atomic_fetch_min(&__a->__a_value, __val, __order);
)
)
}
Expand Down
46 changes: 46 additions & 0 deletions libcxx/include/support/atomic/atomic_scopes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#ifndef __LIBCUDACXX_ATOMIC_SCOPES_H
#define __LIBCUDACXX_ATOMIC_SCOPES_H

#ifndef __ATOMIC_BLOCK
#define __ATOMIC_SYSTEM 0 // 0 indicates default
#define __ATOMIC_DEVICE 1
#define __ATOMIC_BLOCK 2
#define __ATOMIC_THREAD 10
#endif //__ATOMIC_BLOCK

enum thread_scope {
thread_scope_system = __ATOMIC_SYSTEM,
thread_scope_device = __ATOMIC_DEVICE,
thread_scope_block = __ATOMIC_BLOCK,
thread_scope_thread = __ATOMIC_THREAD
};

#define _LIBCUDACXX_ATOMIC_SCOPE_TYPE ::cuda::thread_scope
#define _LIBCUDACXX_ATOMIC_SCOPE_DEFAULT ::cuda::thread_scope::system

struct __thread_scope_thread_tag { };
struct __thread_scope_block_tag { };
struct __thread_scope_device_tag { };
struct __thread_scope_system_tag { };

template<int _Scope> struct __scope_enum_to_tag { };
/* This would be the implementation once an actual thread-scope backend exists.
template<> struct __scope_enum_to_tag<(int)thread_scope_thread> {
using type = __thread_scope_thread_tag; };
Until then: */
template<> struct __scope_enum_to_tag<(int)thread_scope_thread> {
using type = __thread_scope_block_tag; };
template<> struct __scope_enum_to_tag<(int)thread_scope_block> {
using type = __thread_scope_block_tag; };
template<> struct __scope_enum_to_tag<(int)thread_scope_device> {
using type = __thread_scope_device_tag; };
template<> struct __scope_enum_to_tag<(int)thread_scope_system> {
using type = __thread_scope_system_tag; };

template <int _Scope>
_LIBCUDACXX_INLINE_VISIBILITY auto constexpr __scope_tag() ->
typename __scope_enum_to_tag<_Scope>::type {
return typename __scope_enum_to_tag<_Scope>::type();
}

#endif // __LIBCUDACXX_ATOMIC_SCOPES_H

0 comments on commit 83cd4e7

Please sign in to comment.