Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Initial version
Browse files Browse the repository at this point in the history
  • Loading branch information
ogiroux authored and wmaxey committed Oct 12, 2020
1 parent a6ee80b commit 37b23e1
Show file tree
Hide file tree
Showing 3 changed files with 406 additions and 38 deletions.
42 changes: 18 additions & 24 deletions include/cuda/std/detail/__atomic
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ namespace detail {

_LIBCUDACXX_END_NAMESPACE_CUDA

#ifdef _MSC_VER
#include "__atomic_msvc"
#endif

#include "__atomic_generated"
#include "__atomic_derived"

Expand All @@ -123,14 +127,14 @@ __host__ __device__ inline void __cxx_atomic_thread_fence(int __order) {
#ifdef __CUDA_ARCH__
detail::__atomic_thread_fence_cuda(__order, detail::__thread_scope_system_tag());
#else
::std::atomic_thread_fence((::std::memory_order)__order);
__atomic_thread_fence(__order);
#endif
}
__host__ __device__ inline void __cxx_atomic_signal_fence(int __order) {
#ifdef __CUDA_ARCH__
detail::__atomic_signal_fence_cuda(__order);
#else
::std::atomic_signal_fence((::std::memory_order)__order);
__atomic_signal_fence(__order);
#endif
}

Expand Down Expand Up @@ -190,46 +194,36 @@ struct __cxx_atomic_base_impl_default {
constexpr __cxx_atomic_base_impl_default() noexcept = default;
__host__ __device__ constexpr explicit __cxx_atomic_base_impl_default(_Tp __value) noexcept : __a_value(__value) {
}
#ifdef __CUDA_ARCH__
__cxx_atomic_alignment_wrapper_t<_Tp> __a_value;
#else
::std::atomic<__cxx_atomic_alignment_wrapper_t<_Tp>> __a_value;
#endif
};

template<class _Tp, int _Sco>
__host__ __device__ inline void __cxx_atomic_init(__cxx_atomic_base_impl_default<_Tp, _Sco> volatile* __a, _Tp __val) {
#if defined(__CUDA_ARCH__)
auto __tmp = __cxx_atomic_alignment_wrap(__val);
__cxx_atomic_assign_volatile(__a->__a_value, __tmp);
#elif defined(_LIBCUDACXX_CUDA_HAS_NO_HOST_STD_ATOMIC_INIT)
__a->__a_value.store(__val, ::std::memory_order_relaxed);
#else
::std::atomic_init(&__a->__a_value, __cxx_atomic_alignment_wrap(__val));
#endif
}
template<class _Tp, int _Sco>
__host__ __device__ inline void __cxx_atomic_store(__cxx_atomic_base_impl_default<_Tp, _Sco> volatile* __a, _Tp __val, int __order) {
#ifdef __CUDA_ARCH__
detail::__atomic_store_n_cuda(&__a->__a_value, __cxx_atomic_alignment_wrap(__val), __order, detail::__scope_tag<_Sco>());
#else
::std::atomic_store_explicit(&__a->__a_value, __cxx_atomic_alignment_wrap(__val), (::std::memory_order)__order);
__atomic_store_n(&__a->__a_value, __cxx_atomic_alignment_wrap(__val), __order);
#endif
}
template<class _Tp, int _Sco>
__host__ __device__ inline _Tp __cxx_atomic_load(__cxx_atomic_base_impl_default<_Tp, _Sco> const volatile* __a, int __order) {
#ifdef __CUDA_ARCH__
return __cxx_atomic_alignment_unwrap(detail::__atomic_load_n_cuda(&__a->__a_value, __order, detail::__scope_tag<_Sco>()));
#else
return __cxx_atomic_alignment_unwrap(::std::atomic_load_explicit(&__a->__a_value, (::std::memory_order)__order));
return __cxx_atomic_alignment_unwrap(__atomic_load_n(&__a->__a_value, __order));
#endif
}
template<class _Tp, int _Sco>
__host__ __device__ inline _Tp __cxx_atomic_exchange(__cxx_atomic_base_impl_default<_Tp, _Sco> volatile* __a, _Tp __val, int __order) {
#ifdef __CUDA_ARCH__
return __cxx_atomic_alignment_unwrap(detail::__atomic_exchange_n_cuda(&__a->__a_value, __cxx_atomic_alignment_wrap(__val), __order, detail::__scope_tag<_Sco>()));
#else
return __cxx_atomic_alignment_unwrap(::std::atomic_exchange_explicit(&__a->__a_value, __cxx_atomic_alignment_wrap(__val), (::std::memory_order)__order));
return __cxx_atomic_alignment_unwrap(__atomic_exchange_n(&__a->__a_value, __cxx_atomic_alignment_wrap(__val), __order));
#endif
}
template<class _Tp, int _Sco>
Expand All @@ -238,7 +232,7 @@ __host__ __device__ inline bool __cxx_atomic_compare_exchange_strong(__cxx_atomi
#ifdef __CUDA_ARCH__
bool __result = detail::__atomic_compare_exchange_n_cuda(&__a->__a_value, &__tmp, __cxx_atomic_alignment_wrap(__val), false, __success, __failure, detail::__scope_tag<_Sco>());
#else
bool __result = ::std::atomic_compare_exchange_strong_explicit(&__a->__a_value, &__tmp, __cxx_atomic_alignment_wrap(__val), (::std::memory_order)__success, (::std::memory_order)__failure);
bool __result = __atomic_compare_exchange_n(&__a->__a_value, &__tmp, __cxx_atomic_alignment_wrap(__val), false, __success, __failure);
#endif
*__expected = __cxx_atomic_alignment_unwrap(__tmp);
return __result;
Expand All @@ -249,7 +243,7 @@ __host__ __device__ inline bool __cxx_atomic_compare_exchange_weak(__cxx_atomic_
#ifdef __CUDA_ARCH__
bool __result = detail::__atomic_compare_exchange_n_cuda(&__a->__a_value, &__tmp, __cxx_atomic_alignment_wrap(__val), true, __success, __failure, detail::__scope_tag<_Sco>());
#else
bool __result = ::std::atomic_compare_exchange_strong_explicit(&__a->__a_value, &__tmp, __cxx_atomic_alignment_wrap(__val), (::std::memory_order)__success, (::std::memory_order)__failure);
bool __result = __atomic_compare_exchange_n(&__a->__a_value, &__tmp, __cxx_atomic_alignment_wrap(__val), true, __success, __failure);
#endif
*__expected = __cxx_atomic_alignment_unwrap(__tmp);
return __result;
Expand All @@ -259,55 +253,55 @@ __host__ __device__ inline _Tp __cxx_atomic_fetch_add(__cxx_atomic_base_impl_def
#ifdef __CUDA_ARCH__
return detail::__atomic_fetch_add_cuda(&__a->__a_value, __delta, __order, detail::__scope_tag<_Sco>());
#else
return ::std::atomic_fetch_add_explicit(&__a->__a_value, __delta, (::std::memory_order)__order);
return __atomic_fetch_add(&__a->__a_value, __delta, __order);
#endif
}
template<class _Tp, int _Sco>
__host__ __device__ inline _Tp* __cxx_atomic_fetch_add(__cxx_atomic_base_impl_default<_Tp*, _Sco> volatile* __a, ptrdiff_t __delta, int __order) {
#ifdef __CUDA_ARCH__
return detail::__atomic_fetch_add_cuda(&__a->__a_value, __delta, __order, detail::__scope_tag<_Sco>());
#else
return ::std::atomic_fetch_add_explicit(&__a->__a_value, __delta, (::std::memory_order)__order);
return __atomic_fetch_add(&__a->__a_value, __delta, __order);
#endif
}
template<class _Tp, int _Sco>
__host__ __device__ inline _Tp __cxx_atomic_fetch_sub(__cxx_atomic_base_impl_default<_Tp, _Sco> volatile* __a, _Tp __delta, int __order) {
#ifdef __CUDA_ARCH__
return detail::__atomic_fetch_sub_cuda(&__a->__a_value, __delta, __order, detail::__scope_tag<_Sco>());
#else
return ::std::atomic_fetch_sub_explicit(&__a->__a_value, __delta, (::std::memory_order)__order);
return __atomic_fetch_sub(&__a->__a_value, __delta, __order);
#endif
}
template<class _Tp, int _Sco>
__host__ __device__ inline _Tp* __cxx_atomic_fetch_sub(__cxx_atomic_base_impl_default<_Tp*, _Sco> volatile* __a, ptrdiff_t __delta, int __order) {
#ifdef __CUDA_ARCH__
return detail::__atomic_fetch_sub_cuda(&__a->__a_value, __delta, __order, detail::__scope_tag<_Sco>());
#else
return ::std::atomic_fetch_sub_explicit(&__a->__a_value, __delta, (::std::memory_order)__order);
return __atomic_fetch_sub_explicit(&__a->__a_value, __delta, __order);
#endif
}
template<class _Tp, int _Sco>
__host__ __device__ inline _Tp __cxx_atomic_fetch_and(__cxx_atomic_base_impl_default<_Tp, _Sco> volatile* __a, _Tp __pattern, int __order) {
#ifdef __CUDA_ARCH__
return detail::__atomic_fetch_and_cuda(&__a->__a_value, __pattern, __order, detail::__scope_tag<_Sco>());
#else
return ::std::atomic_fetch_and_explicit(&__a->__a_value, __pattern, (::std::memory_order)__order);
return __atomic_fetch_and(&__a->__a_value, __pattern, __order);
#endif
}
template<class _Tp, int _Sco>
__host__ __device__ inline _Tp __cxx_atomic_fetch_or(__cxx_atomic_base_impl_default<_Tp, _Sco> volatile* __a, _Tp __pattern, int __order) {
#ifdef __CUDA_ARCH__
return detail::__atomic_fetch_or_cuda(&__a->__a_value, __pattern, __order, detail::__scope_tag<_Sco>());
#else
return ::std::atomic_fetch_or_explicit(&__a->__a_value, __pattern, (::std::memory_order)__order);
return __atomic_fetch_or(&__a->__a_value, __pattern, __order);
#endif
}
template<class _Tp, int _Sco>
__host__ __device__ inline _Tp __cxx_atomic_fetch_xor(__cxx_atomic_base_impl_default<_Tp, _Sco> volatile* __a, _Tp __pattern, int __order) {
#ifdef __CUDA_ARCH__
return detail::__atomic_fetch_xor_cuda(&__a->__a_value, __pattern, __order, detail::__scope_tag<_Sco>());
#else
return ::std::atomic_fetch_xor_explicit(&__a->__a_value, __pattern, (::std::memory_order)__order);
return __atomic_fetch_xor_explicit(&__a->__a_value, __pattern, __order);
#endif
}

Expand Down
28 changes: 14 additions & 14 deletions include/cuda/std/detail/__atomic_derived
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,23 @@ template<class _Type, class _Delta, class _Scope, typename _CUDA_VSTD::enable_if
_Type __device__ __atomic_fetch_add_cuda(_Type volatile *__ptr, _Delta __val, int __memorder, _Scope __s) {

_Type __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, __s);
_Type const __desired = __expected + __val;
_Type __desired = __expected + __val;
while(!__atomic_compare_exchange_cuda(__ptr, &__expected, &__desired, true, __memorder, __memorder, __s))
;
__desired = __expected + __val;
return __expected;
}

template<class _Type, class _Delta, class _Scope, typename _CUDA_VSTD::enable_if<sizeof(_Type)<=2, int>::type = 0>
_Type __host__ __device__ __atomic_fetch_max_cuda(_Type volatile *__ptr, _Delta __val, int __memorder, _Scope __s) {

_Type __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, __s);
_Type const __desired = __expected > __val ? __expected : __val;
_Type __desired = __expected > __val ? __expected : __val;
#ifdef __CUDA_ARCH__
while(__desired == __val &&
!__atomic_compare_exchange_cuda(__ptr, &__expected, &__desired, true, __memorder, __memorder, __s)) {
#else
while(__desired == __val &&
!::std::atomic_compare_exchange_strong_explicit(__ptr, &__expected, __desired, __memorder, __memorder)) {
!__atomic_compare_exchange(__ptr, &__expected, &__desired, true, __memorder, __memorder)) {
#endif
__desired = __expected > __val ? __expected : __val;
}
Expand All @@ -70,13 +70,13 @@ template<class _Type, class _Delta, class _Scope, typename _CUDA_VSTD::enable_if
_Type __host__ __device__ __atomic_fetch_min_cuda(_Type volatile *__ptr, _Delta __val, int __memorder, _Scope __s) {

_Type __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, __s);
_Type const __desired = __expected < __val ? __expected : __val;
_Type __desired = __expected < __val ? __expected : __val;
#ifdef __CUDA_ARCH__
while(__desired != __val &&
!__atomic_compare_exchange_cuda(__ptr, &__expected, &__desired, true, __memorder, __memorder, __s)) {
#else
while(__desired != __val &&
!::std::atomic_compare_exchange_strong_explicit(__ptr, &__expected, __desired, __memorder, __memorder)) {
!__atomic_compare_exchange(__ptr, &__expected, &__desired, true, __memorder, __memorder)) {
#endif
__desired = __expected < __val ? __expected : __val;
}
Expand All @@ -87,39 +87,39 @@ template<class _Type, class _Delta, class _Scope, typename _CUDA_VSTD::enable_if
_Type __device__ __atomic_fetch_sub_cuda(_Type volatile *__ptr, _Delta __val, int __memorder, _Scope __s) {

_Type __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, __s);
_Type const __desired = __expected - __val;
_Type __desired = __expected - __val;
while(!__atomic_compare_exchange_cuda(__ptr, &__expected, &__desired, true, __memorder, __memorder, __s))
;
__desired = __expected - __val;
return __expected;
}

template<class _Type, class _Delta, class _Scope, typename _CUDA_VSTD::enable_if<sizeof(_Type)<=2, int>::type = 0>
_Type __device__ __atomic_fetch_and_cuda(_Type volatile *__ptr, _Delta __val, int __memorder, _Scope __s) {

_Type __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, __s);
_Type const __desired = __expected & __val;
_Type __desired = __expected & __val;
while(!__atomic_compare_exchange_cuda(__ptr, &__expected, &__desired, true, __memorder, __memorder, __s))
;
__desired = __expected & __val;
return __expected;
}

template<class _Type, class _Delta, class _Scope, typename _CUDA_VSTD::enable_if<sizeof(_Type)<=2, int>::type = 0>
_Type __device__ __atomic_fetch_xor_cuda(_Type volatile *__ptr, _Delta __val, int __memorder, _Scope __s) {

_Type __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, __s);
_Type const __desired = __expected ^ __val;
_Type __desired = __expected ^ __val;
while(!__atomic_compare_exchange_cuda(__ptr, &__expected, &__desired, true, __memorder, __memorder, __s))
;
__desired = __expected ^ __val;
return __expected;
}

template<class _Type, class _Delta, class _Scope, typename _CUDA_VSTD::enable_if<sizeof(_Type)<=2, int>::type = 0>
_Type __device__ __atomic_fetch_or_cuda(_Type volatile *__ptr, _Delta __val, int __memorder, _Scope __s) {

_Type __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, __s);
_Type const __desired = __expected | __val;
_Type __desired = __expected | __val;
while(!__atomic_compare_exchange_cuda(__ptr, &__expected, &__desired, true, __memorder, __memorder, __s))
;
__desired = __expected | __val;
return __expected;
}

Expand Down
Loading

0 comments on commit 37b23e1

Please sign in to comment.