From a4b54a78e1620293fcd0c8f66ddae86db1d9e2e3 Mon Sep 17 00:00:00 2001 From: Wesley Maxey Date: Tue, 31 Aug 2021 17:12:17 -0700 Subject: [PATCH 1/2] Implement atomic_ref, tests, and heterogeneous cuda::tests --- .../test/cuda/bad_atomic_alignment.pass.cpp | 4 +- .../test/heterogeneous/atomic_ref.pass.cpp | 219 +++++++++++ .../heterogeneous/cuda_atomic_ref.pass.cpp | 224 +++++++++++ .../address_ref.pass.cpp | 156 ++++++++ .../address_ref_constness.pass.cpp | 156 ++++++++ .../integral_ref.pass.cpp | 216 +++++++++++ .../integral_ref_constness.pass.cpp | 216 +++++++++++ .../trivially_copyable.fail.cpp | 11 +- .../trivially_copyable.pass.cpp | 5 +- .../trivially_copyable_ref.fail.cpp | 75 ++++ .../atomic_helpers.h | 41 ++ include/cuda/std/atomic | 100 ++++- include/cuda/std/detail/libcxx/include/atomic | 361 +++++++++++++++--- .../include/support/atomic/atomic_base.h | 50 ++- .../include/support/atomic/atomic_cuda.h | 32 +- .../include/support/atomic/cxx_atomic.h | 82 +++- .../address_ref.pass.cpp | 134 +++++++ .../integral_ref.pass.cpp | 189 +++++++++ 18 files changed, 2162 insertions(+), 109 deletions(-) create mode 100644 .upstream-tests/test/heterogeneous/atomic_ref.pass.cpp create mode 100644 .upstream-tests/test/heterogeneous/cuda_atomic_ref.pass.cpp create mode 100644 .upstream-tests/test/std/atomics/atomics.types.generic/address_ref.pass.cpp create mode 100644 .upstream-tests/test/std/atomics/atomics.types.generic/address_ref_constness.pass.cpp create mode 100644 .upstream-tests/test/std/atomics/atomics.types.generic/integral_ref.pass.cpp create mode 100644 .upstream-tests/test/std/atomics/atomics.types.generic/integral_ref_constness.pass.cpp create mode 100644 .upstream-tests/test/std/atomics/atomics.types.generic/trivially_copyable_ref.fail.cpp create mode 100644 libcxx/test/std/atomics/atomics.types.generic/address_ref.pass.cpp create mode 100644 libcxx/test/std/atomics/atomics.types.generic/integral_ref.pass.cpp diff --git a/.upstream-tests/test/cuda/bad_atomic_alignment.pass.cpp b/.upstream-tests/test/cuda/bad_atomic_alignment.pass.cpp index 71ccb2d0b8..c42bd41722 100644 --- a/.upstream-tests/test/cuda/bad_atomic_alignment.pass.cpp +++ b/.upstream-tests/test/cuda/bad_atomic_alignment.pass.cpp @@ -31,7 +31,7 @@ int main(int argc, char ** argv) int32_t b; }; static_assert(alignof(key) == 4, ""); - cuda::atomic k; + cuda::atomic k(key{}); auto r = k.load(); k.store(r); (void)k.exchange(r); @@ -44,7 +44,7 @@ int main(int argc, char ** argv) int32_t b; }; static_assert(alignof(key) == 8, ""); - cuda::atomic k; + cuda::atomic k(key{}); auto r = k.load(); k.store(r); (void)k.exchange(r); diff --git a/.upstream-tests/test/heterogeneous/atomic_ref.pass.cpp b/.upstream-tests/test/heterogeneous/atomic_ref.pass.cpp new file mode 100644 index 0000000000..fde89dacd4 --- /dev/null +++ b/.upstream-tests/test/heterogeneous/atomic_ref.pass.cpp @@ -0,0 +1,219 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: nvrtc, pre-sm-60 +// UNSUPPORTED: windows && pre-sm-70 + +#include "helpers.h" + +#include + +template +struct store_tester +{ + template + __host__ __device__ + static void initialize(A & v) + { + cuda::std::atomic_ref a(v); + using T = decltype(a.load()); + a.store(static_cast(Operand)); + } + + template + __host__ __device__ + static void validate(A & v) + { + cuda::std::atomic_ref a(v); + using T = decltype(a.load()); + assert(a.load() == static_cast(Operand)); + } +}; + +template +struct exchange_tester +{ + template + __host__ __device__ + static void initialize(A & v) + { + cuda::std::atomic_ref a(v); + using T = decltype(a.load()); + assert(a.exchange(static_cast(Operand)) == static_cast(PreviousValue)); + } + + template + __host__ __device__ + static void validate(A & v) + { + cuda::std::atomic_ref a(v); + using T = decltype(a.load()); + assert(a.load() == static_cast(Operand)); + } +}; + +template +struct strong_cas_tester +{ + enum { ShouldSucceed = (Expected == PreviousValue) }; + template + __host__ __device__ + static void initialize(A & v) + { + cuda::std::atomic_ref a(v); + using T = decltype(a.load()); + T expected = static_cast(Expected); + assert(a.compare_exchange_strong(expected, static_cast(Desired)) == ShouldSucceed); + assert(expected == static_cast(PreviousValue)); + } + + template + __host__ __device__ + static void validate(A & v) + { + cuda::std::atomic_ref a(v); + using T = decltype(a.load()); + assert(a.load() == static_cast(Result)); + } +}; + +template +struct weak_cas_tester +{ + enum { ShouldSucceed = (Expected == PreviousValue) }; + template + __host__ __device__ + static void initialize(A & v) + { + cuda::std::atomic_ref a(v); + using T = decltype(a.load()); + T expected = static_cast(Expected); + if (!ShouldSucceed) + { + assert(a.compare_exchange_weak(expected, static_cast(Desired)) == false); + } + else + { + while (a.compare_exchange_weak(expected, static_cast(Desired)) != ShouldSucceed) ; + } + assert(expected == static_cast(PreviousValue)); + } + + template + __host__ __device__ + static void validate(A & v) + { + cuda::std::atomic_ref a(v); + using T = decltype(a.load()); + assert(a.load() == static_cast(Result)); + } +}; + +#define ATOMIC_TESTER(operation) \ + template \ + struct operation ## _tester \ + { \ + template \ + __host__ __device__ \ + static void initialize(A & v) \ + { \ + cuda::std::atomic_ref a(v); \ + using T = decltype(a.load()); \ + assert(a.operation(Operand) == static_cast(PreviousValue)); \ + } \ + \ + template \ + __host__ __device__ \ + static void validate(A & v) \ + { \ + cuda::std::atomic_ref a(v); \ + using T = decltype(a.load()); \ + assert(a.load() == static_cast(ExpectedValue)); \ + } \ + }; + +ATOMIC_TESTER(fetch_add); +ATOMIC_TESTER(fetch_sub); + +ATOMIC_TESTER(fetch_and); +ATOMIC_TESTER(fetch_or); +ATOMIC_TESTER(fetch_xor); + +using basic_testers = tester_list< + store_tester<0>, + store_tester<-1>, + store_tester<17>, + exchange_tester<17, 31>, + /* *_cas_tester */ + weak_cas_tester<31, 12, 13, 31>, + weak_cas_tester<31, 31, -6, -6>, + strong_cas_tester<-6, -6, -12, -12>, + strong_cas_tester<-12, 31, 17, -12>, + exchange_tester<-12, 17> +>; + +using arithmetic_atomic_testers = extend_tester_list< + basic_testers, + fetch_add_tester<17, 13, 30>, + fetch_sub_tester<30, 21, 9>, + fetch_sub_tester<9, 17, -8> +>; + +using bitwise_atomic_testers = extend_tester_list< + arithmetic_atomic_testers, + fetch_add_tester<-8, 10, 2>, + fetch_or_tester<2, 13, 15>, + fetch_and_tester<15, 8, 8>, + fetch_and_tester<8, 13, 8>, + fetch_xor_tester<8, 12, 4> +>; + +void kernel_invoker() +{ + // todo + #ifdef _LIBCUDACXX_ATOMIC_REF_SUPPORTS_SMALL_INTEGRAL + validate_not_movable(); + validate_not_movable(); + #endif + validate_not_movable(); + validate_not_movable(); + validate_not_movable(); + + #ifdef _LIBCUDACXX_ATOMIC_REF_SUPPORTS_SMALL_INTEGRAL + validate_not_movable(); + validate_not_movable(); + #endif + validate_not_movable(); + validate_not_movable(); + validate_not_movable(); + + #ifdef _LIBCUDACXX_ATOMIC_REF_SUPPORTS_SMALL_INTEGRAL + validate_not_movable(); + validate_not_movable(); + #endif + validate_not_movable(); + validate_not_movable(); + validate_not_movable(); + + #ifdef _LIBCUDACXX_ATOMIC_REF_SUPPORTS_SMALL_INTEGRAL + validate_not_movable(); + validate_not_movable(); + #endif + validate_not_movable(); + validate_not_movable(); + validate_not_movable(); +} + +int main(int arg, char ** argv) +{ +#ifndef __CUDA_ARCH__ + kernel_invoker(); +#endif + + return 0; +} diff --git a/.upstream-tests/test/heterogeneous/cuda_atomic_ref.pass.cpp b/.upstream-tests/test/heterogeneous/cuda_atomic_ref.pass.cpp new file mode 100644 index 0000000000..b91485355a --- /dev/null +++ b/.upstream-tests/test/heterogeneous/cuda_atomic_ref.pass.cpp @@ -0,0 +1,224 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: nvrtc, pre-sm-60 +// UNSUPPORTED: windows && pre-sm-70 + +#include "helpers.h" + +#include + +template +struct store_tester +{ + template + __host__ __device__ + static void initialize(A & v) + { + cuda::atomic_ref a(v); + using T = decltype(a.load()); + a.store(static_cast(Operand)); + } + + template + __host__ __device__ + static void validate(A & v) + { + cuda::atomic_ref a(v); + using T = decltype(a.load()); + assert(a.load() == static_cast(Operand)); + } +}; + +template +struct exchange_tester +{ + template + __host__ __device__ + static void initialize(A & v) + { + cuda::atomic_ref a(v); + using T = decltype(a.load()); + assert(a.exchange(static_cast(Operand)) == static_cast(PreviousValue)); + } + + template + __host__ __device__ + static void validate(A & v) + { + cuda::atomic_ref a(v); + using T = decltype(a.load()); + assert(a.load() == static_cast(Operand)); + } +}; + +template +struct strong_cas_tester +{ + enum { ShouldSucceed = (Expected == PreviousValue) }; + template + __host__ __device__ + static void initialize(A & v) + { + cuda::atomic_ref a(v); + using T = decltype(a.load()); + T expected = static_cast(Expected); + assert(a.compare_exchange_strong(expected, static_cast(Desired)) == ShouldSucceed); + assert(expected == static_cast(PreviousValue)); + } + + template + __host__ __device__ + static void validate(A & v) + { + cuda::atomic_ref a(v); + using T = decltype(a.load()); + assert(a.load() == static_cast(Result)); + } +}; + +template +struct weak_cas_tester +{ + enum { ShouldSucceed = (Expected == PreviousValue) }; + template + __host__ __device__ + static void initialize(A & v) + { + cuda::atomic_ref a(v); + using T = decltype(a.load()); + T expected = static_cast(Expected); + if (!ShouldSucceed) + { + assert(a.compare_exchange_weak(expected, static_cast(Desired)) == false); + } + else + { + while (a.compare_exchange_weak(expected, static_cast(Desired)) != ShouldSucceed) ; + } + assert(expected == static_cast(PreviousValue)); + } + + template + __host__ __device__ + static void validate(A & v) + { + cuda::atomic_ref a(v); + using T = decltype(a.load()); + assert(a.load() == static_cast(Result)); + } +}; + +#define ATOMIC_TESTER(operation) \ + template \ + struct operation ## _tester \ + { \ + template \ + __host__ __device__ \ + static void initialize(A & v) \ + { \ + cuda::atomic_ref a(v); \ + using T = decltype(a.load()); \ + assert(a.operation(Operand) == static_cast(PreviousValue)); \ + } \ + \ + template \ + __host__ __device__ \ + static void validate(A & v) \ + { \ + cuda::atomic_ref a(v); \ + using T = decltype(a.load()); \ + assert(a.load() == static_cast(ExpectedValue)); \ + } \ + }; + +ATOMIC_TESTER(fetch_add); +ATOMIC_TESTER(fetch_sub); + +ATOMIC_TESTER(fetch_and); +ATOMIC_TESTER(fetch_or); +ATOMIC_TESTER(fetch_xor); + +ATOMIC_TESTER(fetch_min); +ATOMIC_TESTER(fetch_max); + +using basic_testers = tester_list< + store_tester<0>, + store_tester<-1>, + store_tester<17>, + exchange_tester<17, 31>, + /* *_cas_tester */ + weak_cas_tester<31, 12, 13, 31>, + weak_cas_tester<31, 31, -6, -6>, + strong_cas_tester<-6, -6, -12, -12>, + strong_cas_tester<-12, 31, 17, -12>, + exchange_tester<-12, 17> +>; + +using arithmetic_atomic_testers = extend_tester_list< + basic_testers, + fetch_add_tester<17, 13, 30>, + fetch_sub_tester<30, 21, 9>, + fetch_min_tester<9, 5, 5>, + fetch_max_tester<5, 9, 9>, + fetch_sub_tester<9, 17, -8>, +>; + +using bitwise_atomic_testers = extend_tester_list< + arithmetic_atomic_testers, + fetch_add_tester<-8, 10, 2>, + fetch_or_tester<2, 13, 15>, + fetch_and_tester<15, 8, 8>, + fetch_and_tester<8, 13, 8>, + fetch_xor_tester<8, 12, 4> +>; + +void kernel_invoker() +{ + // todo + #ifdef _LIBCUDACXX_ATOMIC_REF_SUPPORTS_SMALL_INTEGRAL + validate_not_movable(); + validate_not_movable(); + #endif + validate_not_movable(); + validate_not_movable(); + validate_not_movable(); + + #ifdef _LIBCUDACXX_ATOMIC_REF_SUPPORTS_SMALL_INTEGRAL + validate_not_movable(); + validate_not_movable(); + #endif + validate_not_movable(); + validate_not_movable(); + validate_not_movable(); + + #ifdef _LIBCUDACXX_ATOMIC_REF_SUPPORTS_SMALL_INTEGRAL + validate_not_movable(); + validate_not_movable(); + #endif + validate_not_movable(); + validate_not_movable(); + validate_not_movable(); + + #ifdef _LIBCUDACXX_ATOMIC_REF_SUPPORTS_SMALL_INTEGRAL + validate_not_movable(); + validate_not_movable(); + #endif + validate_not_movable(); + validate_not_movable(); + validate_not_movable(); +} + +int main(int arg, char ** argv) +{ +#ifndef __CUDA_ARCH__ + kernel_invoker(); +#endif + + return 0; +} diff --git a/.upstream-tests/test/std/atomics/atomics.types.generic/address_ref.pass.cpp b/.upstream-tests/test/std/atomics/atomics.types.generic/address_ref.pass.cpp new file mode 100644 index 0000000000..40b501dc8b --- /dev/null +++ b/.upstream-tests/test/std/atomics/atomics.types.generic/address_ref.pass.cpp @@ -0,0 +1,156 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: libcpp-has-no-threads, pre-sm-60 +// UNSUPPORTED: windows && pre-sm-70 +// ... test case crashes clang. + +// + +// template +// struct atomic +// { +// bool is_lock_free() const volatile; +// bool is_lock_free() const; +// void store(T* desr, memory_order m = memory_order_seq_cst) volatile; +// void store(T* desr, memory_order m = memory_order_seq_cst); +// T* load(memory_order m = memory_order_seq_cst) const volatile; +// T* load(memory_order m = memory_order_seq_cst) const; +// operator T*() const volatile; +// operator T*() const; +// T* exchange(T* desr, memory_order m = memory_order_seq_cst) volatile; +// T* exchange(T* desr, memory_order m = memory_order_seq_cst); +// bool compare_exchange_weak(T*& expc, T* desr, +// memory_order s, memory_order f) volatile; +// bool compare_exchange_weak(T*& expc, T* desr, +// memory_order s, memory_order f); +// bool compare_exchange_strong(T*& expc, T* desr, +// memory_order s, memory_order f) volatile; +// bool compare_exchange_strong(T*& expc, T* desr, +// memory_order s, memory_order f); +// bool compare_exchange_weak(T*& expc, T* desr, +// memory_order m = memory_order_seq_cst) volatile; +// bool compare_exchange_weak(T*& expc, T* desr, +// memory_order m = memory_order_seq_cst); +// bool compare_exchange_strong(T*& expc, T* desr, +// memory_order m = memory_order_seq_cst) volatile; +// bool compare_exchange_strong(T*& expc, T* desr, +// memory_order m = memory_order_seq_cst); +// T* fetch_add(ptrdiff_t op, memory_order m = memory_order_seq_cst) volatile; +// T* fetch_add(ptrdiff_t op, memory_order m = memory_order_seq_cst); +// T* fetch_sub(ptrdiff_t op, memory_order m = memory_order_seq_cst) volatile; +// T* fetch_sub(ptrdiff_t op, memory_order m = memory_order_seq_cst); +// +// atomic() = default; +// constexpr atomic(T* desr); +// atomic(const atomic&) = delete; +// atomic& operator=(const atomic&) = delete; +// atomic& operator=(const atomic&) volatile = delete; +// +// T* operator=(T*) volatile; +// T* operator=(T*); +// T* operator++(int) volatile; +// T* operator++(int); +// T* operator--(int) volatile; +// T* operator--(int); +// T* operator++() volatile; +// T* operator++(); +// T* operator--() volatile; +// T* operator--(); +// T* operator+=(ptrdiff_t op) volatile; +// T* operator+=(ptrdiff_t op); +// T* operator-=(ptrdiff_t op) volatile; +// T* operator-=(ptrdiff_t op); +// }; + +#include +#include +#include + +#include + +#include "test_macros.h" +#if !defined(TEST_COMPILER_C1XX) + #include "placement_new.h" +#endif +#include "cuda_space_selector.h" + +template class Selector> +__host__ __device__ +void +do_test() +{ + typedef typename cuda::std::remove_pointer::type X; + Selector sel; + T & val = *sel.construct(T(0)); + A obj(val); + bool b0 = obj.is_lock_free(); + ((void)b0); // mark as unused + assert(obj == T(0)); + obj.store(T(0)); + assert(obj == T(0)); + obj.store(T(1), cuda::std::memory_order_release); + assert(obj == T(1)); + assert(obj.load() == T(1)); + assert(obj.load(cuda::std::memory_order_acquire) == T(1)); + assert(obj.exchange(T(2)) == T(1)); + assert(obj == T(2)); + assert(obj.exchange(T(3), cuda::std::memory_order_relaxed) == T(2)); + assert(obj == T(3)); + T x = obj; + assert(cmpxchg_weak_loop(obj, x, T(2)) == true); + assert(obj == T(2)); + assert(x == T(3)); + assert(obj.compare_exchange_weak(x, T(1)) == false); + assert(obj == T(2)); + assert(x == T(2)); + x = T(2); + assert(obj.compare_exchange_strong(x, T(1)) == true); + assert(obj == T(1)); + assert(x == T(2)); + assert(obj.compare_exchange_strong(x, T(0)) == false); + assert(obj == T(1)); + assert(x == T(1)); + assert((obj = T(0)) == T(0)); + assert(obj == T(0)); + obj = T(2*sizeof(X)); + assert((obj += cuda::std::ptrdiff_t(3)) == T(5*sizeof(X))); + assert(obj == T(5*sizeof(X))); + assert((obj -= cuda::std::ptrdiff_t(3)) == T(2*sizeof(X))); + assert(obj == T(2*sizeof(X))); +} + +template class Selector> +__host__ __device__ +void test() +{ + do_test(); + do_test(); +} + +int main(int, char**) +{ +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700 + test, int*, local_memory_selector>(); + test, int*, local_memory_selector>(); + test, int*, local_memory_selector>(); + test, int*, local_memory_selector>(); +#endif +#ifdef __CUDA_ARCH__ + test, int*, shared_memory_selector>(); + test, int*, shared_memory_selector>(); + test, int*, shared_memory_selector>(); + test, int*, shared_memory_selector>(); + + test, int*, global_memory_selector>(); + test, int*, global_memory_selector>(); + test, int*, global_memory_selector>(); + test, int*, global_memory_selector>(); +#endif + return 0; +} diff --git a/.upstream-tests/test/std/atomics/atomics.types.generic/address_ref_constness.pass.cpp b/.upstream-tests/test/std/atomics/atomics.types.generic/address_ref_constness.pass.cpp new file mode 100644 index 0000000000..8c99cb192f --- /dev/null +++ b/.upstream-tests/test/std/atomics/atomics.types.generic/address_ref_constness.pass.cpp @@ -0,0 +1,156 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: libcpp-has-no-threads, pre-sm-60 +// UNSUPPORTED: windows && pre-sm-70 +// ... test case crashes clang. + +// + +// template +// struct atomic +// { +// bool is_lock_free() const volatile; +// bool is_lock_free() const; +// void store(T* desr, memory_order m = memory_order_seq_cst) volatile; +// void store(T* desr, memory_order m = memory_order_seq_cst); +// T* load(memory_order m = memory_order_seq_cst) const volatile; +// T* load(memory_order m = memory_order_seq_cst) const; +// operator T*() const volatile; +// operator T*() const; +// T* exchange(T* desr, memory_order m = memory_order_seq_cst) volatile; +// T* exchange(T* desr, memory_order m = memory_order_seq_cst); +// bool compare_exchange_weak(T*& expc, T* desr, +// memory_order s, memory_order f) volatile; +// bool compare_exchange_weak(T*& expc, T* desr, +// memory_order s, memory_order f); +// bool compare_exchange_strong(T*& expc, T* desr, +// memory_order s, memory_order f) volatile; +// bool compare_exchange_strong(T*& expc, T* desr, +// memory_order s, memory_order f); +// bool compare_exchange_weak(T*& expc, T* desr, +// memory_order m = memory_order_seq_cst) volatile; +// bool compare_exchange_weak(T*& expc, T* desr, +// memory_order m = memory_order_seq_cst); +// bool compare_exchange_strong(T*& expc, T* desr, +// memory_order m = memory_order_seq_cst) volatile; +// bool compare_exchange_strong(T*& expc, T* desr, +// memory_order m = memory_order_seq_cst); +// T* fetch_add(ptrdiff_t op, memory_order m = memory_order_seq_cst) volatile; +// T* fetch_add(ptrdiff_t op, memory_order m = memory_order_seq_cst); +// T* fetch_sub(ptrdiff_t op, memory_order m = memory_order_seq_cst) volatile; +// T* fetch_sub(ptrdiff_t op, memory_order m = memory_order_seq_cst); +// +// atomic() = default; +// constexpr atomic(T* desr); +// atomic(const atomic&) = delete; +// atomic& operator=(const atomic&) = delete; +// atomic& operator=(const atomic&) volatile = delete; +// +// T* operator=(T*) volatile; +// T* operator=(T*); +// T* operator++(int) volatile; +// T* operator++(int); +// T* operator--(int) volatile; +// T* operator--(int); +// T* operator++() volatile; +// T* operator++(); +// T* operator--() volatile; +// T* operator--(); +// T* operator+=(ptrdiff_t op) volatile; +// T* operator+=(ptrdiff_t op); +// T* operator-=(ptrdiff_t op) volatile; +// T* operator-=(ptrdiff_t op); +// }; + +#include +#include +#include + +#include + +#include "test_macros.h" +#if !defined(TEST_COMPILER_C1XX) + #include "placement_new.h" +#endif +#include "cuda_space_selector.h" + +template class Selector> +__host__ __device__ +void +do_test() +{ + typedef typename cuda::std::remove_pointer::type X; + Selector sel; + T & val = *sel.construct(T(0)); + A obj(val); + bool b0 = obj.is_lock_free(); + ((void)b0); // mark as unused + assert(obj == T(0)); + obj.store(T(0)); + assert(obj == T(0)); + obj.store(T(1), cuda::std::memory_order_release); + assert(obj == T(1)); + assert(obj.load() == T(1)); + assert(obj.load(cuda::std::memory_order_acquire) == T(1)); + assert(obj.exchange(T(2)) == T(1)); + assert(obj == T(2)); + assert(obj.exchange(T(3), cuda::std::memory_order_relaxed) == T(2)); + assert(obj == T(3)); + T x = obj; + assert(cmpxchg_weak_loop(obj, x, T(2)) == true); + assert(obj == T(2)); + assert(x == T(3)); + assert(obj.compare_exchange_weak(x, T(1)) == false); + assert(obj == T(2)); + assert(x == T(2)); + x = T(2); + assert(obj.compare_exchange_strong(x, T(1)) == true); + assert(obj == T(1)); + assert(x == T(2)); + assert(obj.compare_exchange_strong(x, T(0)) == false); + assert(obj == T(1)); + assert(x == T(1)); + assert((obj = T(0)) == T(0)); + assert(obj == T(0)); + obj = T(2*sizeof(X)); + assert((obj += cuda::std::ptrdiff_t(3)) == T(5*sizeof(X))); + assert(obj == T(5*sizeof(X))); + assert((obj -= cuda::std::ptrdiff_t(3)) == T(2*sizeof(X))); + assert(obj == T(2*sizeof(X))); +} + +template class Selector> +__host__ __device__ +void test() +{ + do_test(); + do_test(); +} + +int main(int, char**) +{ +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700 + test, int*, local_memory_selector>(); + test, int*, local_memory_selector>(); + test, int*, local_memory_selector>(); + test, int*, local_memory_selector>(); +#endif +#ifdef __CUDA_ARCH__ + test, int*, shared_memory_selector>(); + test, int*, shared_memory_selector>(); + test, int*, shared_memory_selector>(); + test, int*, shared_memory_selector>(); + + test, int*, global_memory_selector>(); + test, int*, global_memory_selector>(); + test, int*, global_memory_selector>(); + test, int*, global_memory_selector>(); +#endif + return 0; +} diff --git a/.upstream-tests/test/std/atomics/atomics.types.generic/integral_ref.pass.cpp b/.upstream-tests/test/std/atomics/atomics.types.generic/integral_ref.pass.cpp new file mode 100644 index 0000000000..09d678b64f --- /dev/null +++ b/.upstream-tests/test/std/atomics/atomics.types.generic/integral_ref.pass.cpp @@ -0,0 +1,216 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: libcpp-has-no-threads, pre-sm-60 +// UNSUPPORTED: windows && pre-sm-70 + +// + +// template <> +// struct atomic_ref +// { +// bool is_lock_free() const volatile; +// bool is_lock_free() const; +// void store(integral desr, memory_order m = memory_order_seq_cst) volatile; +// void store(integral desr, memory_order m = memory_order_seq_cst); +// integral load(memory_order m = memory_order_seq_cst) const volatile; +// integral load(memory_order m = memory_order_seq_cst) const; +// operator integral() const volatile; +// operator integral() const; +// integral exchange(integral desr, +// memory_order m = memory_order_seq_cst) volatile; +// integral exchange(integral desr, memory_order m = memory_order_seq_cst); +// bool compare_exchange_weak(integral& expc, integral desr, +// memory_order s, memory_order f) volatile; +// bool compare_exchange_weak(integral& expc, integral desr, +// memory_order s, memory_order f); +// bool compare_exchange_strong(integral& expc, integral desr, +// memory_order s, memory_order f) volatile; +// bool compare_exchange_strong(integral& expc, integral desr, +// memory_order s, memory_order f); +// bool compare_exchange_weak(integral& expc, integral desr, +// memory_order m = memory_order_seq_cst) volatile; +// bool compare_exchange_weak(integral& expc, integral desr, +// memory_order m = memory_order_seq_cst); +// bool compare_exchange_strong(integral& expc, integral desr, +// memory_order m = memory_order_seq_cst) volatile; +// bool compare_exchange_strong(integral& expc, integral desr, +// memory_order m = memory_order_seq_cst); +// +// integral +// fetch_add(integral op, memory_order m = memory_order_seq_cst) volatile; +// integral fetch_add(integral op, memory_order m = memory_order_seq_cst); +// integral +// fetch_sub(integral op, memory_order m = memory_order_seq_cst) volatile; +// integral fetch_sub(integral op, memory_order m = memory_order_seq_cst); +// integral +// fetch_and(integral op, memory_order m = memory_order_seq_cst) volatile; +// integral fetch_and(integral op, memory_order m = memory_order_seq_cst); +// integral +// fetch_or(integral op, memory_order m = memory_order_seq_cst) volatile; +// integral fetch_or(integral op, memory_order m = memory_order_seq_cst); +// integral +// fetch_xor(integral op, memory_order m = memory_order_seq_cst) volatile; +// integral fetch_xor(integral op, memory_order m = memory_order_seq_cst); +// +// atomic_ref() = delete; +// constexpr atomic_ref(integral& desr); +// atomic_ref(const atomic_ref&) = default; +// atomic_ref& operator=(const atomic_ref&) = delete; +// atomic_ref& operator=(const atomic_ref&) volatile = delete; +// integral operator=(integral desr) volatile; +// integral operator=(integral desr); +// +// integral operator++(int) volatile; +// integral operator++(int); +// integral operator--(int) volatile; +// integral operator--(int); +// integral operator++() volatile; +// integral operator++(); +// integral operator--() volatile; +// integral operator--(); +// integral operator+=(integral op) volatile; +// integral operator+=(integral op); +// integral operator-=(integral op) volatile; +// integral operator-=(integral op); +// integral operator&=(integral op) volatile; +// integral operator&=(integral op); +// integral operator|=(integral op) volatile; +// integral operator|=(integral op); +// integral operator^=(integral op) volatile; +// integral operator^=(integral op); +// }; + +#include +#include + +#include + +#include "test_macros.h" +#if !defined(TEST_COMPILER_C1XX) + #include "placement_new.h" +#endif +#include "cuda_space_selector.h" + +template class Selector> +__host__ __device__ __noinline__ +void do_test() { + Selector sel; + T & val = *sel.construct(T(0)); + assert(&val); + assert(val == T(0)); + A obj(val); + assert(obj.load() == T(0)); + bool b0 = obj.is_lock_free(); + ((void)b0); // mark as unused + obj.store(T(0)); + assert(obj.load() == T(0)); + assert(obj == T(0)); + obj.store(T(1), cuda::std::memory_order_release); + assert(obj == T(1)); + assert(obj.load() == T(1)); + assert(obj.load(cuda::std::memory_order_acquire) == T(1)); + assert(obj.exchange(T(2)) == T(1)); + assert(obj == T(2)); + assert(obj.exchange(T(3), cuda::std::memory_order_relaxed) == T(2)); + assert(obj == T(3)); + T x = obj; + assert(cmpxchg_weak_loop(obj, x, T(2)) == true); + assert(obj == T(2)); + assert(x == T(3)); + assert(obj.compare_exchange_weak(x, T(1)) == false); + assert(obj == T(2)); + assert(x == T(2)); + x = T(2); + assert(obj.compare_exchange_strong(x, T(1)) == true); + assert(obj == T(1)); + assert(x == T(2)); + assert(obj.compare_exchange_strong(x, T(0)) == false); + assert(obj == T(1)); + assert(x == T(1)); + assert((obj = T(0)) == T(0)); + assert(obj == T(0)); + assert(obj++ == T(0)); + assert(obj == T(1)); + assert(++obj == T(2)); + assert(obj == T(2)); + assert(--obj == T(1)); + assert(obj == T(1)); + assert(obj-- == T(1)); + assert(obj == T(0)); + obj = T(2); + assert((obj += T(3)) == T(5)); + assert(obj == T(5)); + assert((obj -= T(3)) == T(2)); + assert(obj == T(2)); + assert((obj |= T(5)) == T(7)); + assert(obj == T(7)); + assert((obj &= T(0xF)) == T(7)); + assert(obj == T(7)); + assert((obj ^= T(0xF)) == T(8)); + assert(obj == T(8)); +} + +template class Selector> +__host__ __device__ __noinline__ +void test() +{ + do_test(); + do_test(); +} + +template typename Atomic, cuda::thread_scope Scope, template class Selector> +__host__ __device__ +void test_for_all_types() +{ + test, int, Selector>(); + test, unsigned int, Selector>(); + test, long, Selector>(); + test, unsigned long, Selector>(); + test, long long, Selector>(); + test, unsigned long long, Selector>(); +#ifndef _LIBCUDACXX_HAS_NO_UNICODE_CHARS + test, char32_t, Selector>(); +#endif // _LIBCUDACXX_HAS_NO_UNICODE_CHARS + test, int32_t, Selector>(); + test, uint32_t, Selector>(); + test, int64_t, Selector>(); + test, uint64_t, Selector>(); +} + +template +using cuda_std_atomic_ref = cuda::std::atomic_ref; + +template +using cuda_atomic_ref = cuda::atomic_ref; + +int main(int, char**) +{ + // this test would instantiate more cases than just the ones below + // but ptxas already consumes 5 GB of RAM while translating these + // so in the interest of not eating all memory, it's limited to the current set + // + // the per-function tests *should* cover the other codegen aspects of the + // code, and the cross between scopes and memory locations below should provide + // a *reasonable* subset of all the possible combinations to provide enough + // confidence that this all actually works + +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700 + test_for_all_types(); + test_for_all_types(); +#endif +#ifdef __CUDA_ARCH__ + test_for_all_types(); + test_for_all_types(); + + test_for_all_types(); + test_for_all_types(); +#endif + + return 0; +} diff --git a/.upstream-tests/test/std/atomics/atomics.types.generic/integral_ref_constness.pass.cpp b/.upstream-tests/test/std/atomics/atomics.types.generic/integral_ref_constness.pass.cpp new file mode 100644 index 0000000000..cf0f98312e --- /dev/null +++ b/.upstream-tests/test/std/atomics/atomics.types.generic/integral_ref_constness.pass.cpp @@ -0,0 +1,216 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: libcpp-has-no-threads, pre-sm-60 +// UNSUPPORTED: windows && pre-sm-70 + +// + +// template <> +// struct atomic_ref +// { +// bool is_lock_free() const volatile; +// bool is_lock_free() const; +// void store(integral desr, memory_order m = memory_order_seq_cst) volatile; +// void store(integral desr, memory_order m = memory_order_seq_cst); +// integral load(memory_order m = memory_order_seq_cst) const volatile; +// integral load(memory_order m = memory_order_seq_cst) const; +// operator integral() const volatile; +// operator integral() const; +// integral exchange(integral desr, +// memory_order m = memory_order_seq_cst) volatile; +// integral exchange(integral desr, memory_order m = memory_order_seq_cst); +// bool compare_exchange_weak(integral& expc, integral desr, +// memory_order s, memory_order f) volatile; +// bool compare_exchange_weak(integral& expc, integral desr, +// memory_order s, memory_order f); +// bool compare_exchange_strong(integral& expc, integral desr, +// memory_order s, memory_order f) volatile; +// bool compare_exchange_strong(integral& expc, integral desr, +// memory_order s, memory_order f); +// bool compare_exchange_weak(integral& expc, integral desr, +// memory_order m = memory_order_seq_cst) volatile; +// bool compare_exchange_weak(integral& expc, integral desr, +// memory_order m = memory_order_seq_cst); +// bool compare_exchange_strong(integral& expc, integral desr, +// memory_order m = memory_order_seq_cst) volatile; +// bool compare_exchange_strong(integral& expc, integral desr, +// memory_order m = memory_order_seq_cst); +// +// integral +// fetch_add(integral op, memory_order m = memory_order_seq_cst) volatile; +// integral fetch_add(integral op, memory_order m = memory_order_seq_cst); +// integral +// fetch_sub(integral op, memory_order m = memory_order_seq_cst) volatile; +// integral fetch_sub(integral op, memory_order m = memory_order_seq_cst); +// integral +// fetch_and(integral op, memory_order m = memory_order_seq_cst) volatile; +// integral fetch_and(integral op, memory_order m = memory_order_seq_cst); +// integral +// fetch_or(integral op, memory_order m = memory_order_seq_cst) volatile; +// integral fetch_or(integral op, memory_order m = memory_order_seq_cst); +// integral +// fetch_xor(integral op, memory_order m = memory_order_seq_cst) volatile; +// integral fetch_xor(integral op, memory_order m = memory_order_seq_cst); +// +// atomic_ref() = delete; +// constexpr atomic_ref(integral& desr); +// atomic_ref(const atomic_ref&) = default; +// atomic_ref& operator=(const atomic_ref&) = delete; +// atomic_ref& operator=(const atomic_ref&) volatile = delete; +// integral operator=(integral desr) volatile; +// integral operator=(integral desr); +// +// integral operator++(int) volatile; +// integral operator++(int); +// integral operator--(int) volatile; +// integral operator--(int); +// integral operator++() volatile; +// integral operator++(); +// integral operator--() volatile; +// integral operator--(); +// integral operator+=(integral op) volatile; +// integral operator+=(integral op); +// integral operator-=(integral op) volatile; +// integral operator-=(integral op); +// integral operator&=(integral op) volatile; +// integral operator&=(integral op); +// integral operator|=(integral op) volatile; +// integral operator|=(integral op); +// integral operator^=(integral op) volatile; +// integral operator^=(integral op); +// }; + +#include +#include + +#include + +#include "test_macros.h" +#if !defined(TEST_COMPILER_C1XX) + #include "placement_new.h" +#endif +#include "cuda_space_selector.h" + +template class Selector> +__host__ __device__ __noinline__ +void do_test() { + Selector sel; + T & val = *sel.construct(T(0)); + assert(&val); + assert(val == T(0)); + A obj(val); + assert(obj.load() == T(0)); + bool b0 = obj.is_lock_free(); + ((void)b0); // mark as unused + obj.store(T(0)); + assert(obj.load() == T(0)); + assert(obj == T(0)); + obj.store(T(1), cuda::std::memory_order_release); + assert(obj == T(1)); + assert(obj.load() == T(1)); + assert(obj.load(cuda::std::memory_order_acquire) == T(1)); + assert(obj.exchange(T(2)) == T(1)); + assert(obj == T(2)); + assert(obj.exchange(T(3), cuda::std::memory_order_relaxed) == T(2)); + assert(obj == T(3)); + T x = obj; + assert(cmpxchg_weak_loop(obj, x, T(2)) == true); + assert(obj == T(2)); + assert(x == T(3)); + assert(obj.compare_exchange_weak(x, T(1)) == false); + assert(obj == T(2)); + assert(x == T(2)); + x = T(2); + assert(obj.compare_exchange_strong(x, T(1)) == true); + assert(obj == T(1)); + assert(x == T(2)); + assert(obj.compare_exchange_strong(x, T(0)) == false); + assert(obj == T(1)); + assert(x == T(1)); + assert((obj = T(0)) == T(0)); + assert(obj == T(0)); + assert(obj++ == T(0)); + assert(obj == T(1)); + assert(++obj == T(2)); + assert(obj == T(2)); + assert(--obj == T(1)); + assert(obj == T(1)); + assert(obj-- == T(1)); + assert(obj == T(0)); + obj = T(2); + assert((obj += T(3)) == T(5)); + assert(obj == T(5)); + assert((obj -= T(3)) == T(2)); + assert(obj == T(2)); + assert((obj |= T(5)) == T(7)); + assert(obj == T(7)); + assert((obj &= T(0xF)) == T(7)); + assert(obj == T(7)); + assert((obj ^= T(0xF)) == T(8)); + assert(obj == T(8)); +} + +template class Selector> +__host__ __device__ __noinline__ +void test() +{ + do_test(); + do_test(); +} + +template typename Atomic, cuda::thread_scope Scope, template class Selector> +__host__ __device__ +void test_for_all_types() +{ + test, int, Selector>(); + test, unsigned int, Selector>(); + test, long, Selector>(); + test, unsigned long, Selector>(); + test, long long, Selector>(); + test, unsigned long long, Selector>(); +#ifndef _LIBCUDACXX_HAS_NO_UNICODE_CHARS + test, char32_t, Selector>(); +#endif // _LIBCUDACXX_HAS_NO_UNICODE_CHARS + test, int32_t, Selector>(); + test, uint32_t, Selector>(); + test, int64_t, Selector>(); + test, uint64_t, Selector>(); +} + +template +using cuda_std_atomic_ref = const cuda::std::atomic_ref; + +template +using cuda_atomic_ref = const cuda::atomic_ref; + +int main(int, char**) +{ + // this test would instantiate more cases than just the ones below + // but ptxas already consumes 5 GB of RAM while translating these + // so in the interest of not eating all memory, it's limited to the current set + // + // the per-function tests *should* cover the other codegen aspects of the + // code, and the cross between scopes and memory locations below should provide + // a *reasonable* subset of all the possible combinations to provide enough + // confidence that this all actually works + +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700 + test_for_all_types(); + test_for_all_types(); +#endif +#ifdef __CUDA_ARCH__ + test_for_all_types(); + test_for_all_types(); + + test_for_all_types(); + test_for_all_types(); +#endif + + return 0; +} diff --git a/.upstream-tests/test/std/atomics/atomics.types.generic/trivially_copyable.fail.cpp b/.upstream-tests/test/std/atomics/atomics.types.generic/trivially_copyable.fail.cpp index 96178d95bd..a114ece627 100644 --- a/.upstream-tests/test/std/atomics/atomics.types.generic/trivially_copyable.fail.cpp +++ b/.upstream-tests/test/std/atomics/atomics.types.generic/trivially_copyable.fail.cpp @@ -9,6 +9,9 @@ // .fail. expects compilation to fail, but this would only fail at runtime with NVRTC // UNSUPPORTED: nvrtc +// trivially_copyable not supported on gcc4.8 +// UNSUPPORTED: gcc-4.8 + // // template @@ -53,16 +56,18 @@ #include struct NotTriviallyCopyable { - NotTriviallyCopyable ( int i ) : i_(i) {} - NotTriviallyCopyable ( const NotTriviallyCopyable &rhs) : i_(rhs.i_) {} + __host__ __device__ NotTriviallyCopyable ( int i ) : i_(i) {} + __host__ __device__ NotTriviallyCopyable ( const NotTriviallyCopyable &rhs) : i_(rhs.i_) {} int i_; }; -template +template +__host__ __device__ void test ( T t ) { cuda::std::atomic t0(t); } + int main(int, char**) { test(NotTriviallyCopyable(42)); diff --git a/.upstream-tests/test/std/atomics/atomics.types.generic/trivially_copyable.pass.cpp b/.upstream-tests/test/std/atomics/atomics.types.generic/trivially_copyable.pass.cpp index 76fbe41d23..3757fa1a4b 100644 --- a/.upstream-tests/test/std/atomics/atomics.types.generic/trivially_copyable.pass.cpp +++ b/.upstream-tests/test/std/atomics/atomics.types.generic/trivially_copyable.pass.cpp @@ -64,13 +64,14 @@ struct TriviallyCopyable { __host__ __device__ TriviallyCopyable ( int i ) : i_(i) {} int i_; - }; +}; template __host__ __device__ void test ( T t ) { cuda::std::atomic t0(t); - } + cuda::std::atomic_ref t1(t); +} int main(int, char**) { diff --git a/.upstream-tests/test/std/atomics/atomics.types.generic/trivially_copyable_ref.fail.cpp b/.upstream-tests/test/std/atomics/atomics.types.generic/trivially_copyable_ref.fail.cpp new file mode 100644 index 0000000000..f86e3c3e13 --- /dev/null +++ b/.upstream-tests/test/std/atomics/atomics.types.generic/trivially_copyable_ref.fail.cpp @@ -0,0 +1,75 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// .fail. expects compilation to fail, but this would only fail at runtime with NVRTC +// UNSUPPORTED: nvrtc + +// trivially_copyable not supported on gcc4.8 +// UNSUPPORTED: gcc-4.8 + +// + +// template +// struct atomic +// { +// bool is_lock_free() const volatile noexcept; +// bool is_lock_free() const noexcept; +// void store(T desr, memory_order m = memory_order_seq_cst) volatile noexcept; +// void store(T desr, memory_order m = memory_order_seq_cst) noexcept; +// T load(memory_order m = memory_order_seq_cst) const volatile noexcept; +// T load(memory_order m = memory_order_seq_cst) const noexcept; +// operator T() const volatile noexcept; +// operator T() const noexcept; +// T exchange(T desr, memory_order m = memory_order_seq_cst) volatile noexcept; +// T exchange(T desr, memory_order m = memory_order_seq_cst) noexcept; +// bool compare_exchange_weak(T& expc, T desr, +// memory_order s, memory_order f) volatile noexcept; +// bool compare_exchange_weak(T& expc, T desr, memory_order s, memory_order f) noexcept; +// bool compare_exchange_strong(T& expc, T desr, +// memory_order s, memory_order f) volatile noexcept; +// bool compare_exchange_strong(T& expc, T desr, +// memory_order s, memory_order f) noexcept; +// bool compare_exchange_weak(T& expc, T desr, +// memory_order m = memory_order_seq_cst) volatile noexcept; +// bool compare_exchange_weak(T& expc, T desr, +// memory_order m = memory_order_seq_cst) noexcept; +// bool compare_exchange_strong(T& expc, T desr, +// memory_order m = memory_order_seq_cst) volatile noexcept; +// bool compare_exchange_strong(T& expc, T desr, +// memory_order m = memory_order_seq_cst) noexcept; +// +// atomic() noexcept = default; +// constexpr atomic(T desr) noexcept; +// atomic(const atomic&) = delete; +// atomic& operator=(const atomic&) = delete; +// atomic& operator=(const atomic&) volatile = delete; +// T operator=(T) volatile noexcept; +// T operator=(T) noexcept; +// }; + +#include +#include + +struct NotTriviallyCopyable { + __host__ __device__ NotTriviallyCopyable ( int i ) : i_(i) {} + __host__ __device__ NotTriviallyCopyable ( const NotTriviallyCopyable &rhs) : i_(rhs.i_) {} + int i_; +}; + +template +__host__ __device__ +void test ( T t ) { + cuda::std::atomic_ref t0(t); +} + +int main(int, char**) +{ + test(NotTriviallyCopyable(42)); + + return 0; +} diff --git a/.upstream-tests/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h b/.upstream-tests/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h index 9e211a849d..2e73644884 100644 --- a/.upstream-tests/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h +++ b/.upstream-tests/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h @@ -78,5 +78,46 @@ struct TestEachAtomicType { } }; +template < template class, cuda::thread_scope> class TestFunctor, + template class Selector, cuda::thread_scope Scope +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 + = cuda::thread_scope_system +#endif +> +struct TestEachIntegralRefType { + __host__ __device__ + void operator()() const { + TestFunctor()(); + TestFunctor()(); + TestFunctor()(); + TestFunctor()(); + TestFunctor()(); + TestFunctor()(); +#ifndef _LIBCUDACXX_HAS_NO_UNICODE_CHARS + TestFunctor()(); +#endif + TestFunctor< int32_t, Selector, Scope>()(); + TestFunctor()(); + TestFunctor< int64_t, Selector, Scope>()(); + TestFunctor()(); + } +}; + +template < template class, cuda::thread_scope> class TestFunctor, + template class Selector, cuda::thread_scope Scope +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 + = cuda::thread_scope_system +#endif +> +struct TestEachAtomicRefType { + __host__ __device__ + void operator()() const { + TestEachIntegralRefType()(); + TestFunctor()(); + TestFunctor()(); + TestFunctor()(); + } +}; + #endif // ATOMIC_HELPER_H diff --git a/include/cuda/std/atomic b/include/cuda/std/atomic index fa277dd829..11d4a528ce 100644 --- a/include/cuda/std/atomic +++ b/include/cuda/std/atomic @@ -82,8 +82,8 @@ struct atomic : public std::__atomic_base<_Tp, _Sco> { typedef std::__atomic_base<_Tp, _Sco> __base; - __host__ __device__ - atomic() noexcept : __base() {} + + constexpr atomic() noexcept = default; __host__ __device__ constexpr atomic(_Tp __d) noexcept : __base(__d) {} @@ -114,8 +114,8 @@ struct atomic<_Tp*, _Sco> : public std::__atomic_base<_Tp*, _Sco> { typedef std::__atomic_base<_Tp*, _Sco> __base; - __host__ __device__ - atomic() noexcept : __base() {} + + constexpr atomic() noexcept = default; __host__ __device__ constexpr atomic(_Tp* __d) noexcept : __base(__d) {} @@ -167,6 +167,98 @@ struct atomic<_Tp*, _Sco> _Tp* operator-=(ptrdiff_t __op) noexcept {return fetch_sub(__op) - __op;} }; +// atomic + +template +struct atomic_ref + : public std::__atomic_base_ref<_Tp, _Sco> +{ + typedef std::__atomic_base_ref<_Tp, _Sco> __base; + + __host__ __device__ + constexpr atomic_ref(_Tp& __d) noexcept : __base(__d) {} + + __host__ __device__ + _Tp operator=(_Tp __d) const volatile noexcept + {__base::store(__d); return __d;} + __host__ __device__ + _Tp operator=(_Tp __d) const noexcept + {__base::store(__d); return __d;} + + __host__ __device__ + _Tp fetch_max(const _Tp & __op, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return std::__detail::__cxx_atomic_fetch_max(&this->__a_, __op, __m); + } + + __host__ __device__ + _Tp fetch_min(const _Tp & __op, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return std::__detail::__cxx_atomic_fetch_min(&this->__a_, __op, __m); + } +}; + +// atomic + +template +struct atomic_ref<_Tp*, _Sco> + : public std::__atomic_base_ref<_Tp*, _Sco> +{ + typedef std::__atomic_base_ref<_Tp*, _Sco> __base; + + __host__ __device__ + constexpr atomic_ref(_Tp*& __d) noexcept : __base(__d) {} + + __host__ __device__ + _Tp* operator=(_Tp* __d) const volatile noexcept + {__base::store(__d); return __d;} + __host__ __device__ + _Tp* operator=(_Tp* __d) const noexcept + {__base::store(__d); return __d;} + + __host__ __device__ + _Tp* fetch_add(ptrdiff_t __op, + memory_order __m = memory_order_seq_cst) const volatile noexcept + {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} + __host__ __device__ + _Tp* fetch_add(ptrdiff_t __op, + memory_order __m = memory_order_seq_cst) const noexcept + {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} + __host__ __device__ + _Tp* fetch_sub(ptrdiff_t __op, + memory_order __m = memory_order_seq_cst) const volatile noexcept + {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} + __host__ __device__ + _Tp* fetch_sub(ptrdiff_t __op, + memory_order __m = memory_order_seq_cst) const noexcept + {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} + + __host__ __device__ + _Tp* operator++(int) const volatile noexcept {return fetch_add(1);} + __host__ __device__ + _Tp* operator++(int) const noexcept {return fetch_add(1);} + __host__ __device__ + _Tp* operator--(int) const volatile noexcept {return fetch_sub(1);} + __host__ __device__ + _Tp* operator--(int) const noexcept {return fetch_sub(1);} + __host__ __device__ + _Tp* operator++() const volatile noexcept {return fetch_add(1) + 1;} + __host__ __device__ + _Tp* operator++() const noexcept {return fetch_add(1) + 1;} + __host__ __device__ + _Tp* operator--() const volatile noexcept {return fetch_sub(1) - 1;} + __host__ __device__ + _Tp* operator--() const noexcept {return fetch_sub(1) - 1;} + __host__ __device__ + _Tp* operator+=(ptrdiff_t __op) const volatile noexcept {return fetch_add(__op) + __op;} + __host__ __device__ + _Tp* operator+=(ptrdiff_t __op) const noexcept {return fetch_add(__op) + __op;} + __host__ __device__ + _Tp* operator-=(ptrdiff_t __op) const volatile noexcept {return fetch_sub(__op) - __op;} + __host__ __device__ + _Tp* operator-=(ptrdiff_t __op) const noexcept {return fetch_sub(__op) - __op;} +}; + inline __host__ __device__ void atomic_thread_fence(memory_order __m, thread_scope _Scope = thread_scope::thread_scope_system) { NV_DISPATCH_TARGET( NV_IS_DEVICE, ( diff --git a/include/cuda/std/detail/libcxx/include/atomic b/include/cuda/std/detail/libcxx/include/atomic index 7ca9731aaa..615fa8b66e 100644 --- a/include/cuda/std/detail/libcxx/include/atomic +++ b/include/cuda/std/detail/libcxx/include/atomic @@ -697,6 +697,7 @@ namespace __detail { } using __detail::__cxx_atomic_base_impl; +using __detail::__cxx_atomic_ref_base_impl; using __detail::__cxx_atomic_thread_fence; using __detail::__cxx_atomic_signal_fence; using __detail::__cxx_atomic_load; @@ -754,6 +755,8 @@ struct __cxx_atomic_lock_impl { __cxx_atomic_lock_impl(_Tp value) _NOEXCEPT : __a_value(value), __a_lock(0) {} + __cxx_atomic_lock_impl(const __cxx_atomic_lock_impl&) _NOEXCEPT = default; + _Tp __a_value; mutable __cxx_atomic_base_impl<_LIBCUDACXX_ATOMIC_FLAG_TYPE, _Sco> __a_lock; @@ -1054,20 +1057,12 @@ template > #endif //_LIBCUDACXX_ATOMIC_ONLY_USE_BUILTINS struct __cxx_atomic_impl : public _Base { - -#if _GNUC_VER >= 501 - static_assert(is_trivially_copyable<_Tp>::value, - "std::atomic requires that 'Tp' be a trivially copyable type"); -#endif - -#ifdef _LIBCUDACXX_CXX03_LANG - _LIBCUDACXX_INLINE_VISIBILITY -#endif __cxx_atomic_impl() _NOEXCEPT _LIBCUDACXX_DEFAULT _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR explicit __cxx_atomic_impl(_Tp value) _NOEXCEPT : _Base(value) {} }; + template _LIBCUDACXX_INLINE_VISIBILITY __cxx_atomic_impl<_Tp, _Sco>* __cxx_atomic_rebind(_Tp* __inst) { @@ -1076,6 +1071,9 @@ __cxx_atomic_impl<_Tp, _Sco>* __cxx_atomic_rebind(_Tp* __inst) { return (__cxx_atomic_impl<_Tp, _Sco>*)__inst; } +template +using __cxx_atomic_ref_impl = __cxx_atomic_ref_base_impl<_Tp, _Sco>; + #ifdef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE template @@ -1241,13 +1239,15 @@ _LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_wait(__cxx_atomic_impl<_Tp, _Sco __cxx_atomic_try_wait_slow(__a, __val, __order); } -// general atomic - -template ::value && !is_same<_Tp, bool>::value> -struct __atomic_base // false -{ +// general atomic/atomic_ref +template () && !is_same<_Tp, bool>()> +struct __atomic_base { mutable __cxx_atomic_impl<_Tp, _Sco> __a_; + __atomic_base() = default; + _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR + __atomic_base(const _Tp& __a) _NOEXCEPT : __a_(__a) {} + #if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) static _LIBCUDACXX_CONSTEXPR bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(decltype(__a_)), 0); #endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) @@ -1357,48 +1357,135 @@ struct __atomic_base // false {__cxx_atomic_notify_all(&__a_);} _LIBCUDACXX_INLINE_VISIBILITY void notify_all() _NOEXCEPT {__cxx_atomic_notify_all(&__a_);} +}; -#ifdef _LIBCUDACXX_CXX03_LANG - _LIBCUDACXX_INLINE_VISIBILITY -#endif - __atomic_base() _NOEXCEPT _LIBCUDACXX_DEFAULT +template () && !is_same<_Tp, bool>()> +struct __atomic_base_ref { + mutable __cxx_atomic_ref_impl<_Tp, _Sco> __a_; + __atomic_base_ref() = default; _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR - __atomic_base(_Tp __d) _NOEXCEPT : __a_(__d) {} + __atomic_base_ref(_Tp& __a) _NOEXCEPT : __a_(__a) {} + +#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + static _LIBCUDACXX_CONSTEXPR bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(decltype(__a_)), 0); +#endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + -#ifndef _LIBCUDACXX_CXX03_LANG - __atomic_base(const __atomic_base&) = delete; - __atomic_base& operator=(const __atomic_base&) = delete; - __atomic_base& operator=(const __atomic_base&) volatile = delete; -#else -private: _LIBCUDACXX_INLINE_VISIBILITY - __atomic_base(const __atomic_base&); + bool is_lock_free() const volatile _NOEXCEPT + {return __cxx_atomic_is_lock_free(sizeof(_Tp));} _LIBCUDACXX_INLINE_VISIBILITY - __atomic_base& operator=(const __atomic_base&); + bool is_lock_free() const _NOEXCEPT + {return static_cast<__atomic_base_ref const volatile*>(this)->is_lock_free();} _LIBCUDACXX_INLINE_VISIBILITY - __atomic_base& operator=(const __atomic_base&) volatile; -#endif -}; - -#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) -template -_LIBCUDACXX_CONSTEXPR bool __atomic_base<_Tp, _Sco, __b>::is_always_lock_free; -#endif + void store(_Tp __d, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT + _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) + {__cxx_atomic_store(&__a_, __d, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + void store(_Tp __d, memory_order __m = memory_order_seq_cst) const _NOEXCEPT + _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) + {__cxx_atomic_store(&__a_, __d, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp load(memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT + _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) + {return __cxx_atomic_load(&__a_, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp load(memory_order __m = memory_order_seq_cst) const _NOEXCEPT + _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) + {return __cxx_atomic_load(&__a_, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + operator _Tp() const volatile _NOEXCEPT {return load();} + _LIBCUDACXX_INLINE_VISIBILITY + operator _Tp() const _NOEXCEPT {return load();} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT + {return __cxx_atomic_exchange(&__a_, __d, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) const _NOEXCEPT + {return __cxx_atomic_exchange(&__a_, __d, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + bool compare_exchange_weak(_Tp& __e, _Tp __d, + memory_order __s, memory_order __f) const volatile _NOEXCEPT + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + {return __cxx_atomic_compare_exchange_weak(&__a_, &__e, __d, __s, __f);} + _LIBCUDACXX_INLINE_VISIBILITY + bool compare_exchange_weak(_Tp& __e, _Tp __d, + memory_order __s, memory_order __f) const _NOEXCEPT + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + {return __cxx_atomic_compare_exchange_weak(&__a_, &__e, __d, __s, __f);} + _LIBCUDACXX_INLINE_VISIBILITY + bool compare_exchange_strong(_Tp& __e, _Tp __d, + memory_order __s, memory_order __f) const volatile _NOEXCEPT + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + {return __cxx_atomic_compare_exchange_strong(&__a_, &__e, __d, __s, __f);} + _LIBCUDACXX_INLINE_VISIBILITY + bool compare_exchange_strong(_Tp& __e, _Tp __d, + memory_order __s, memory_order __f) const _NOEXCEPT + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + {return __cxx_atomic_compare_exchange_strong(&__a_, &__e, __d, __s, __f);} + _LIBCUDACXX_INLINE_VISIBILITY + bool compare_exchange_weak(_Tp& __e, _Tp __d, + memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT { + if (memory_order_acq_rel == __m) + return __cxx_atomic_compare_exchange_weak(&__a_, &__e, __d, __m, memory_order_acquire); + else if (memory_order_release == __m) + return __cxx_atomic_compare_exchange_weak(&__a_, &__e, __d, __m, memory_order_relaxed); + else + return __cxx_atomic_compare_exchange_weak(&__a_, &__e, __d, __m, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY + bool compare_exchange_weak(_Tp& __e, _Tp __d, + memory_order __m = memory_order_seq_cst) const _NOEXCEPT { + if(memory_order_acq_rel == __m) + return __cxx_atomic_compare_exchange_weak(&__a_, &__e, __d, __m, memory_order_acquire); + else if(memory_order_release == __m) + return __cxx_atomic_compare_exchange_weak(&__a_, &__e, __d, __m, memory_order_relaxed); + else + return __cxx_atomic_compare_exchange_weak(&__a_, &__e, __d, __m, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY + bool compare_exchange_strong(_Tp& __e, _Tp __d, + memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT { + if (memory_order_acq_rel == __m) + return __cxx_atomic_compare_exchange_strong(&__a_, &__e, __d, __m, memory_order_acquire); + else if (memory_order_release == __m) + return __cxx_atomic_compare_exchange_strong(&__a_, &__e, __d, __m, memory_order_relaxed); + else + return __cxx_atomic_compare_exchange_strong(&__a_, &__e, __d, __m, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY + bool compare_exchange_strong(_Tp& __e, _Tp __d, + memory_order __m = memory_order_seq_cst) const _NOEXCEPT { + if (memory_order_acq_rel == __m) + return __cxx_atomic_compare_exchange_strong(&__a_, &__e, __d, __m, memory_order_acquire); + else if (memory_order_release == __m) + return __cxx_atomic_compare_exchange_strong(&__a_, &__e, __d, __m, memory_order_relaxed); + else + return __cxx_atomic_compare_exchange_strong(&__a_, &__e, __d, __m, __m); + } -// atomic + _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT + {__cxx_atomic_wait(&__a_, __v, __m);} + _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT + {__cxx_atomic_wait(&__a_, __v, __m);} + _LIBCUDACXX_INLINE_VISIBILITY void notify_one() const volatile _NOEXCEPT + {__cxx_atomic_notify_one(&__a_);} + _LIBCUDACXX_INLINE_VISIBILITY void notify_one() const _NOEXCEPT + {__cxx_atomic_notify_one(&__a_);} + _LIBCUDACXX_INLINE_VISIBILITY void notify_all() const volatile _NOEXCEPT + {__cxx_atomic_notify_all(&__a_);} + _LIBCUDACXX_INLINE_VISIBILITY void notify_all() const _NOEXCEPT + {__cxx_atomic_notify_all(&__a_);} +}; +// atomic/atomic_ref template -struct __atomic_base<_Tp, _Sco, true> - : public __atomic_base<_Tp, _Sco, false> -{ - typedef __atomic_base<_Tp, _Sco, false> __base; -#ifdef _LIBCUDACXX_CXX03_LANG - _LIBCUDACXX_INLINE_VISIBILITY -#endif - __atomic_base() _NOEXCEPT _LIBCUDACXX_DEFAULT - _LIBCUDACXX_INLINE_VISIBILITY - _LIBCUDACXX_CONSTEXPR __atomic_base(_Tp __d) _NOEXCEPT : __base(__d) {} +struct __atomic_base<_Tp, _Sco, true> : public __atomic_base<_Tp, _Sco, false> { + __atomic_base() = default; + + _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR + __atomic_base(const _Tp& __a) _NOEXCEPT : __atomic_base<_Tp, _Sco, false>(__a) {} _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT @@ -1469,6 +1556,90 @@ struct __atomic_base<_Tp, _Sco, true> _Tp operator^=(_Tp __op) _NOEXCEPT {return fetch_xor(__op) ^ __op;} }; +template +struct __atomic_base_ref<_Tp, _Sco, true> : public __atomic_base_ref<_Tp, _Sco, false> { + __atomic_base_ref() = default; + + _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR + __atomic_base_ref(_Tp& __a) _NOEXCEPT : __atomic_base_ref<_Tp, _Sco, false>(__a) {} + + _LIBCUDACXX_INLINE_VISIBILITY + _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT + {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) const _NOEXCEPT + {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT + {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) const _NOEXCEPT + {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT + {return __cxx_atomic_fetch_and(&this->__a_, __op, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) const _NOEXCEPT + {return __cxx_atomic_fetch_and(&this->__a_, __op, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT + {return __cxx_atomic_fetch_or(&this->__a_, __op, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) const _NOEXCEPT + {return __cxx_atomic_fetch_or(&this->__a_, __op, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT + {return __cxx_atomic_fetch_xor(&this->__a_, __op, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) const _NOEXCEPT + {return __cxx_atomic_fetch_xor(&this->__a_, __op, __m);} + + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator++(int) const volatile _NOEXCEPT {return fetch_add(_Tp(1));} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator++(int) const _NOEXCEPT {return fetch_add(_Tp(1));} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator--(int) const volatile _NOEXCEPT {return fetch_sub(_Tp(1));} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator--(int) const _NOEXCEPT {return fetch_sub(_Tp(1));} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator++() const volatile _NOEXCEPT {return fetch_add(_Tp(1)) + _Tp(1);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator++() const _NOEXCEPT {return fetch_add(_Tp(1)) + _Tp(1);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator--() const volatile _NOEXCEPT {return fetch_sub(_Tp(1)) - _Tp(1);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator--() const _NOEXCEPT {return fetch_sub(_Tp(1)) - _Tp(1);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator+=(_Tp __op) const volatile _NOEXCEPT {return fetch_add(__op) + __op;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator+=(_Tp __op) const _NOEXCEPT {return fetch_add(__op) + __op;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator-=(_Tp __op) const volatile _NOEXCEPT {return fetch_sub(__op) - __op;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator-=(_Tp __op) const _NOEXCEPT {return fetch_sub(__op) - __op;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator&=(_Tp __op) const volatile _NOEXCEPT {return fetch_and(__op) & __op;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator&=(_Tp __op) const _NOEXCEPT {return fetch_and(__op) & __op;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator|=(_Tp __op) const volatile _NOEXCEPT {return fetch_or(__op) | __op;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator|=(_Tp __op) const _NOEXCEPT {return fetch_or(__op) | __op;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator^=(_Tp __op) const volatile _NOEXCEPT {return fetch_xor(__op) ^ __op;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator^=(_Tp __op) const _NOEXCEPT {return fetch_xor(__op) ^ __op;} +}; + +#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) +template +_LIBCUDACXX_CONSTEXPR bool __atomic_base<_Tp, _Sco, _Integral>::is_always_lock_free; + +template +_LIBCUDACXX_CONSTEXPR bool __atomic_base_ref<_Tp, _Sco, _Integral>::is_always_lock_free; +#endif + // atomic template @@ -1519,14 +1690,16 @@ struct atomic<_Tp*> volatile _NOEXCEPT {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} _LIBCUDACXX_INLINE_VISIBILITY - _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT + _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) + _NOEXCEPT {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} _LIBCUDACXX_INLINE_VISIBILITY - _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT + _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) + _NOEXCEPT {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} _LIBCUDACXX_INLINE_VISIBILITY @@ -1555,6 +1728,98 @@ struct atomic<_Tp*> _Tp* operator-=(ptrdiff_t __op) _NOEXCEPT {return fetch_sub(__op) - __op;} }; +// atomic_ref + +template + struct atomic_ref + : public __atomic_base_ref<_Tp> +{ + typedef __atomic_base_ref<_Tp> __base; + using value_type = _Tp; + + static constexpr size_t required_alignment = sizeof(_Tp); + + static constexpr bool is_always_lock_free = sizeof(_Tp) <= 8; + + _LIBCUDACXX_INLINE_VISIBILITY + explicit atomic_ref(_Tp& __ref) : __base(__ref) {} + + atomic_ref(const atomic_ref&) noexcept = default; + atomic_ref& operator=(const atomic_ref&) = delete; + + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator=(_Tp __v) const noexcept {__base::store(__v); return __v;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp operator=(_Tp __v) const volatile noexcept {__base::store(__v); return __v;} +}; + +// atomic_ref + +template + struct atomic_ref<_Tp*> + : public __atomic_base_ref<_Tp*> +{ + typedef __atomic_base_ref<_Tp*> __base; + using value_type = _Tp*; + + static constexpr size_t required_alignment = sizeof(_Tp*); + + static constexpr bool is_always_lock_free = sizeof(_Tp*) <= 8; + + _LIBCUDACXX_INLINE_VISIBILITY + explicit atomic_ref(_Tp*& __ref) : __base(__ref) {} + + atomic_ref(const atomic_ref&) noexcept = default; + atomic_ref& operator=(const atomic_ref&) = delete; + + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* operator=(_Tp* __v) const noexcept {__base::store(__v); return __v;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* operator=(_Tp* __v) const volatile noexcept {__base::store(__v); return __v;} + + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) + const volatile _NOEXCEPT + {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) + const _NOEXCEPT + {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) + const volatile _NOEXCEPT + {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) + const _NOEXCEPT + {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} + + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* operator++(int) const volatile _NOEXCEPT {return fetch_add(1);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* operator++(int) const _NOEXCEPT {return fetch_add(1);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* operator--(int) const volatile _NOEXCEPT {return fetch_sub(1);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* operator--(int) const _NOEXCEPT {return fetch_sub(1);} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* operator++() const volatile _NOEXCEPT {return fetch_add(1) + 1;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* operator++() const _NOEXCEPT {return fetch_add(1) + 1;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* operator--() const volatile _NOEXCEPT {return fetch_sub(1) - 1;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* operator--() const _NOEXCEPT {return fetch_sub(1) - 1;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* operator+=(ptrdiff_t __op) const volatile _NOEXCEPT {return fetch_add(__op) + __op;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* operator+=(ptrdiff_t __op) const _NOEXCEPT {return fetch_add(__op) + __op;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* operator-=(ptrdiff_t __op) const volatile _NOEXCEPT {return fetch_sub(__op) - __op;} + _LIBCUDACXX_INLINE_VISIBILITY + _Tp* operator-=(ptrdiff_t __op) const _NOEXCEPT {return fetch_sub(__op) - __op;} +}; + // atomic_is_lock_free template diff --git a/include/cuda/std/detail/libcxx/include/support/atomic/atomic_base.h b/include/cuda/std/detail/libcxx/include/support/atomic/atomic_base.h index d03da59805..2c1a9cbf63 100644 --- a/include/cuda/std/detail/libcxx/include/support/atomic/atomic_base.h +++ b/include/cuda/std/detail/libcxx/include/support/atomic/atomic_base.h @@ -12,6 +12,7 @@ #define _LIBCUDACXX_ATOMIC_BASE_H #include "cxx_atomic.h" +#include _LIBCUDACXX_INLINE_VISIBILITY inline _LIBCUDACXX_CONSTEXPR int __cxx_atomic_order_to_int(memory_order __order) { // Avoid switch statement to make this a constexpr. @@ -35,13 +36,13 @@ _LIBCUDACXX_INLINE_VISIBILITY inline _LIBCUDACXX_CONSTEXPR int __cxx_atomic_fail template inline void __cxx_atomic_init(volatile _Tp* __a, _Up __val) { - auto __a_tmp = __cxx_atomic_base_unwrap(__a); + auto __a_tmp = __cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)); __cxx_atomic_assign_volatile(*__a_tmp, __val); } template inline void __cxx_atomic_init(_Tp* __a, _Up __val) { - auto __a_tmp = __cxx_atomic_base_unwrap(__a); + auto __a_tmp = __cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)); *__a_tmp = __val; } @@ -58,39 +59,34 @@ void __cxx_atomic_signal_fence(memory_order __order) { template inline void __cxx_atomic_store(_Tp* __a, _Up __val, memory_order __order) { - typename _CUDA_VSTD::remove_cv<_Tp>::type __v_temp(__val); - (void)__a; - __atomic_store(__a, &__v_temp, __cxx_atomic_order_to_int(__order)); + auto __v_temp = __cxx_atomic_wrap_to_base(__a, __val); + __atomic_store(__cxx_atomic_unwrap(__a), &__v_temp, __cxx_atomic_order_to_int(__order)); } template inline auto __cxx_atomic_load(const _Tp* __a, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> { - typename _CUDA_VSTD::remove_cv<_Tp>::type __ret; - (void)__a; - __atomic_load(__a, &__ret, __cxx_atomic_order_to_int(__order)); - return __ret.__a_value; + auto __ret = __cxx_atomic_base_temporary(__a); + __atomic_load(__cxx_atomic_unwrap(__a), &__ret, __cxx_atomic_order_to_int(__order)); + return *__cxx_get_underlying_atomic(&__ret); } template -inline auto __cxx_atomic_exchange(_Tp* __a, _Up __value, +inline auto __cxx_atomic_exchange(_Tp* __a, _Up __val, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> { - typename _CUDA_VSTD::remove_cv<_Tp>::type __v_temp(__value); - typename _CUDA_VSTD::remove_cv<_Tp>::type __ret; - (void)__a; - __atomic_exchange(__a, &__v_temp, &__ret, __cxx_atomic_order_to_int(__order)); - return __ret.__a_value; + auto __v_temp = __cxx_atomic_wrap_to_base(__a, __val); + auto __ret = __cxx_atomic_base_temporary(__a); + __atomic_exchange(__cxx_atomic_unwrap(__a), &__v_temp, &__ret, __cxx_atomic_order_to_int(__order)); + return *__cxx_get_underlying_atomic(&__ret); } template inline bool __cxx_atomic_compare_exchange_strong( _Tp* __a, _Up* __expected, _Up __value, memory_order __success, memory_order __failure) { - auto __a_tmp = __cxx_atomic_base_unwrap(__a); - (void)__a_tmp; (void)__expected; - return __atomic_compare_exchange(__a_tmp, __expected, &__value, - false, + return __atomic_compare_exchange(__cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)), + __expected, &__value, false, __cxx_atomic_order_to_int(__success), __cxx_atomic_failure_order_to_int(__failure)); } @@ -99,11 +95,9 @@ template inline bool __cxx_atomic_compare_exchange_weak( _Tp* __a, _Up* __expected, _Up __value, memory_order __success, memory_order __failure) { - auto __a_tmp = __cxx_atomic_base_unwrap(__a); - (void)__a_tmp; (void)__expected; - return __atomic_compare_exchange(__a_tmp, __expected, &__value, - true, + return __atomic_compare_exchange(__cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)), + __expected, &__value, true, __cxx_atomic_order_to_int(__success), __cxx_atomic_failure_order_to_int(__failure)); } @@ -125,7 +119,7 @@ template inline auto __cxx_atomic_fetch_add(_Tp* __a, _Td __delta, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> { constexpr auto __skip_v = __atomic_ptr_inc<__cxx_atomic_underlying_t<_Tp>>::value; - auto __a_tmp = __cxx_atomic_base_unwrap(__a); + auto __a_tmp = __cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)); return __atomic_fetch_add(__a_tmp, __delta * __skip_v, __cxx_atomic_order_to_int(__order)); } @@ -134,7 +128,7 @@ template inline auto __cxx_atomic_fetch_sub(_Tp* __a, _Td __delta, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> { constexpr auto __skip_v = __atomic_ptr_inc<__cxx_atomic_underlying_t<_Tp>>::value; - auto __a_tmp = __cxx_atomic_base_unwrap(__a); + auto __a_tmp = __cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)); return __atomic_fetch_sub(__a_tmp, __delta * __skip_v, __cxx_atomic_order_to_int(__order)); } @@ -142,7 +136,7 @@ inline auto __cxx_atomic_fetch_sub(_Tp* __a, _Td __delta, template inline auto __cxx_atomic_fetch_and(_Tp* __a, _Td __pattern, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> { - auto __a_tmp = __cxx_atomic_base_unwrap(__a); + auto __a_tmp = __cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)); return __atomic_fetch_and(__a_tmp, __pattern, __cxx_atomic_order_to_int(__order)); } @@ -150,7 +144,7 @@ inline auto __cxx_atomic_fetch_and(_Tp* __a, _Td __pattern, template inline auto __cxx_atomic_fetch_or(_Tp* __a, _Td __pattern, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> { - auto __a_tmp = __cxx_atomic_base_unwrap(__a); + auto __a_tmp = __cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)); return __atomic_fetch_or(__a_tmp, __pattern, __cxx_atomic_order_to_int(__order)); } @@ -158,7 +152,7 @@ inline auto __cxx_atomic_fetch_or(_Tp* __a, _Td __pattern, template inline auto __cxx_atomic_fetch_xor(_Tp* __a, _Td __pattern, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> { - auto __a_tmp = __cxx_atomic_base_unwrap(__a); + auto __a_tmp = __cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)); return __atomic_fetch_xor(__a_tmp, __pattern, __cxx_atomic_order_to_int(__order)); } diff --git a/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h b/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h index 4ecc096b41..1c2803e5fe 100644 --- a/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h +++ b/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h @@ -93,45 +93,53 @@ inline ) } -template -using __cxx_atomic_base_heterogeneous_storage - = typename conditional<_Ref, - __host::__cxx_atomic_ref_base_impl<_Tp, _Sco>, - __host::__cxx_atomic_base_impl<_Tp, _Sco> >::type; - - template struct __cxx_atomic_base_heterogeneous_impl { __cxx_atomic_base_heterogeneous_impl() noexcept = default; + _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR explicit __cxx_atomic_base_heterogeneous_impl(_Tp __value) : __a_value(__value) { } - __cxx_atomic_base_heterogeneous_storage<_Tp, _Sco, _Ref> __a_value; + __host::__cxx_atomic_base_impl<_Tp, _Sco> __a_value; +}; + +template +struct __cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, true> { + __cxx_atomic_base_heterogeneous_impl() noexcept = default; + + static_assert(sizeof(_Tp) >= 4, "atomic_ref does not support 1 or 2 byte types"); + static_assert(sizeof(_Tp) <= 8, "atomic_ref does not support types larger than 8 bytes"); + + _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR explicit + __cxx_atomic_base_heterogeneous_impl(_Tp& __value) : __a_value(__value) { + } + + __host::__cxx_atomic_ref_base_impl<_Tp, _Sco> __a_value; }; template _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR _Tp* __cxx_get_underlying_device_atomic(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> * __a) _NOEXCEPT { - return __cxx_atomic_base_unwrap(&__a->__a_value); + return __cxx_get_underlying_atomic(&__a->__a_value); } template _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR volatile _Tp* __cxx_get_underlying_device_atomic(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a) _NOEXCEPT { - return __cxx_atomic_base_unwrap(&__a->__a_value); + return __cxx_get_underlying_atomic(&__a->__a_value); } template _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR const _Tp* __cxx_get_underlying_device_atomic(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> const* __a) _NOEXCEPT { - return __cxx_atomic_base_unwrap(&__a->__a_value); + return __cxx_get_underlying_atomic(&__a->__a_value); } template _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR const volatile _Tp* __cxx_get_underlying_device_atomic(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> const volatile* __a) _NOEXCEPT { - return __cxx_atomic_base_unwrap(&__a->__a_value); + return __cxx_get_underlying_atomic(&__a->__a_value); } template diff --git a/include/cuda/std/detail/libcxx/include/support/atomic/cxx_atomic.h b/include/cuda/std/detail/libcxx/include/support/atomic/cxx_atomic.h index 338dbe8759..fcbc23eb19 100644 --- a/include/cuda/std/detail/libcxx/include/support/atomic/cxx_atomic.h +++ b/include/cuda/std/detail/libcxx/include/support/atomic/cxx_atomic.h @@ -14,8 +14,16 @@ template struct __cxx_atomic_base_impl { using __underlying_t = _Tp; + using __temporary_t = __cxx_atomic_base_impl<_Tp, _Sco>; + using __wrap_t = __cxx_atomic_base_impl<_Tp, _Sco>; + static constexpr int __sco = _Sco; +#if !defined(_LIBCUDACXX_COMPILER_GCC) || (__GNUC__ >= 5) + static_assert(is_trivially_copyable<_Tp>::value, + "std::atomic requires that 'Tp' be a trivially copyable type"); +#endif + _LIBCUDACXX_CONSTEXPR __cxx_atomic_base_impl() _NOEXCEPT = default; @@ -30,35 +38,60 @@ _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR _Tp* __cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco> * __a) _NOEXCEPT { return &__a->__a_value; } - template _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR volatile _Tp* __cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco> volatile* __a) _NOEXCEPT { return &__a->__a_value; } - template _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR const _Tp* __cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco> const* __a) _NOEXCEPT { return &__a->__a_value; } - template _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR const volatile _Tp* __cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco> const volatile* __a) _NOEXCEPT { return &__a->__a_value; } +template +_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR +__cxx_atomic_base_impl<_Tp, _Sco>* __cxx_atomic_unwrap(__cxx_atomic_base_impl<_Tp, _Sco>* __a) _NOEXCEPT { + return __a; +} +template +_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR +volatile __cxx_atomic_base_impl<_Tp, _Sco>* __cxx_atomic_unwrap(__cxx_atomic_base_impl<_Tp, _Sco> volatile* __a) _NOEXCEPT { + return __a; +} +template +_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR +const __cxx_atomic_base_impl<_Tp, _Sco>* __cxx_atomic_unwrap(__cxx_atomic_base_impl<_Tp, _Sco> const* __a) _NOEXCEPT { + return __a; +} +template +_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR +const volatile __cxx_atomic_base_impl<_Tp, _Sco>* __cxx_atomic_unwrap(__cxx_atomic_base_impl<_Tp, _Sco> const volatile* __a) _NOEXCEPT { + return __a; +} template struct __cxx_atomic_ref_base_impl { using __underlying_t = _Tp; + using __temporary_t = _Tp; + using __wrap_t = _Tp; + static constexpr int __sco = _Sco; +#if !defined(_LIBCUDACXX_COMPILER_GCC) || (__GNUC__ >= 5) + static_assert(is_trivially_copyable<_Tp>::value, + "std::atomic_ref requires that 'Tp' be a trivially copyable type"); +#endif + _LIBCUDACXX_CONSTEXPR - __cxx_atomic_ref_base_impl() _NOEXCEPT = default; + __cxx_atomic_ref_base_impl() _NOEXCEPT = delete; _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR explicit - __cxx_atomic_ref_base_impl(_Tp value) _NOEXCEPT : __a_value(value) {} + __cxx_atomic_ref_base_impl(_Tp& value) _NOEXCEPT : __a_value(&value) {} _Tp* __a_value; }; @@ -68,28 +101,57 @@ _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR _Tp* __cxx_get_underlying_atomic(__cxx_atomic_ref_base_impl<_Tp, _Sco>* __a) _NOEXCEPT { return __a->__a_value; } - template _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR volatile _Tp* __cxx_get_underlying_atomic(__cxx_atomic_ref_base_impl<_Tp, _Sco> volatile* __a) _NOEXCEPT { return __a->__a_value; } - template _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR const _Tp* __cxx_get_underlying_atomic(__cxx_atomic_ref_base_impl<_Tp, _Sco> const* __a) _NOEXCEPT { return __a->__a_value; } - template _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR const volatile _Tp* __cxx_get_underlying_atomic(__cxx_atomic_ref_base_impl<_Tp, _Sco> const volatile* __a) _NOEXCEPT { return __a->__a_value; } +template +_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR +_Tp* __cxx_atomic_unwrap(__cxx_atomic_ref_base_impl<_Tp, _Sco>* __a) _NOEXCEPT { + return __cxx_get_underlying_atomic(__a); +} +template +_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR +volatile _Tp* __cxx_atomic_unwrap(__cxx_atomic_ref_base_impl<_Tp, _Sco> volatile* __a) _NOEXCEPT { + return __cxx_get_underlying_atomic(__a); +} +template +_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR +const _Tp* __cxx_atomic_unwrap(__cxx_atomic_ref_base_impl<_Tp, _Sco> const* __a) _NOEXCEPT { + return __cxx_get_underlying_atomic(__a); +} +template +_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR +const volatile _Tp* __cxx_atomic_unwrap(__cxx_atomic_ref_base_impl<_Tp, _Sco> const volatile* __a) _NOEXCEPT { + return __cxx_get_underlying_atomic(__a); +} template -_LIBCUDACXX_INLINE_VISIBILITY auto __cxx_atomic_base_unwrap(_Tp* __a) _NOEXCEPT -> decltype(__cxx_get_underlying_atomic(__a)) { - return __cxx_get_underlying_atomic(__a); +_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR +_Tp* __cxx_get_underlying_atomic(_Tp* __a) _NOEXCEPT { + return __a; +} + +template +_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR +auto __cxx_atomic_wrap_to_base(_Tp*, _Up __val) _NOEXCEPT -> typename _Tp::__wrap_t { + return typename _Tp::__wrap_t(__val); +} +template +_LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR +auto __cxx_atomic_base_temporary(_Tp*) _NOEXCEPT -> typename _Tp::__temporary_t { + return typename _Tp::__temporary_t(); } template diff --git a/libcxx/test/std/atomics/atomics.types.generic/address_ref.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/address_ref.pass.cpp new file mode 100644 index 0000000000..b78d8a2ecb --- /dev/null +++ b/libcxx/test/std/atomics/atomics.types.generic/address_ref.pass.cpp @@ -0,0 +1,134 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: libcpp-has-no-threads +// ... test case crashes clang. + +// + +// template +// struct atomic +// { +// bool is_lock_free() const volatile; +// bool is_lock_free() const; +// void store(T* desr, memory_order m = memory_order_seq_cst) volatile; +// void store(T* desr, memory_order m = memory_order_seq_cst); +// T* load(memory_order m = memory_order_seq_cst) const volatile; +// T* load(memory_order m = memory_order_seq_cst) const; +// operator T*() const volatile; +// operator T*() const; +// T* exchange(T* desr, memory_order m = memory_order_seq_cst) volatile; +// T* exchange(T* desr, memory_order m = memory_order_seq_cst); +// bool compare_exchange_weak(T*& expc, T* desr, +// memory_order s, memory_order f) volatile; +// bool compare_exchange_weak(T*& expc, T* desr, +// memory_order s, memory_order f); +// bool compare_exchange_strong(T*& expc, T* desr, +// memory_order s, memory_order f) volatile; +// bool compare_exchange_strong(T*& expc, T* desr, +// memory_order s, memory_order f); +// bool compare_exchange_weak(T*& expc, T* desr, +// memory_order m = memory_order_seq_cst) volatile; +// bool compare_exchange_weak(T*& expc, T* desr, +// memory_order m = memory_order_seq_cst); +// bool compare_exchange_strong(T*& expc, T* desr, +// memory_order m = memory_order_seq_cst) volatile; +// bool compare_exchange_strong(T*& expc, T* desr, +// memory_order m = memory_order_seq_cst); +// T* fetch_add(ptrdiff_t op, memory_order m = memory_order_seq_cst) volatile; +// T* fetch_add(ptrdiff_t op, memory_order m = memory_order_seq_cst); +// T* fetch_sub(ptrdiff_t op, memory_order m = memory_order_seq_cst) volatile; +// T* fetch_sub(ptrdiff_t op, memory_order m = memory_order_seq_cst); +// +// atomic() = default; +// constexpr atomic(T* desr); +// atomic(const atomic&) = delete; +// atomic& operator=(const atomic&) = delete; +// atomic& operator=(const atomic&) volatile = delete; +// +// T* operator=(T*) volatile; +// T* operator=(T*); +// T* operator++(int) volatile; +// T* operator++(int); +// T* operator--(int) volatile; +// T* operator--(int); +// T* operator++() volatile; +// T* operator++(); +// T* operator--() volatile; +// T* operator--(); +// T* operator+=(ptrdiff_t op) volatile; +// T* operator+=(ptrdiff_t op); +// T* operator-=(ptrdiff_t op) volatile; +// T* operator-=(ptrdiff_t op); +// }; + +#include +#include +#include +#include + +#include + +#include "test_macros.h" + +template +void +do_test() +{ + typedef typename std::remove_pointer::type X; + T val(0); + A obj(val); + bool b0 = obj.is_lock_free(); + ((void)b0); // mark as unused + assert(obj == T(0)); + obj.store(T(1), std::memory_order_release); + assert(obj == T(1)); + assert(obj.load() == T(1)); + assert(obj.load(std::memory_order_acquire) == T(1)); + assert(obj.exchange(T(2)) == T(1)); + assert(obj == T(2)); + assert(obj.exchange(T(3), std::memory_order_relaxed) == T(2)); + assert(obj == T(3)); + T x = obj; + assert(cmpxchg_weak_loop(obj, x, T(2)) == true); + assert(obj == T(2)); + assert(x == T(3)); + assert(obj.compare_exchange_weak(x, T(1)) == false); + assert(obj == T(2)); + assert(x == T(2)); + x = T(2); + assert(obj.compare_exchange_strong(x, T(1)) == true); + assert(obj == T(1)); + assert(x == T(2)); + assert(obj.compare_exchange_strong(x, T(0)) == false); + assert(obj == T(1)); + assert(x == T(1)); + assert((obj = T(0)) == T(0)); + assert(obj == T(0)); + obj = T(2*sizeof(X)); + assert((obj += std::ptrdiff_t(3)) == T(5*sizeof(X))); + assert(obj == T(5*sizeof(X))); + assert((obj -= std::ptrdiff_t(3)) == T(2*sizeof(X))); + assert(obj == T(2*sizeof(X))); +} + +template +void test() +{ + do_test(); + do_test(); + do_test(); + do_test(); +} + +int main(int, char**) +{ + test, int*>(); + + return 0; +} diff --git a/libcxx/test/std/atomics/atomics.types.generic/integral_ref.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/integral_ref.pass.cpp new file mode 100644 index 0000000000..290f1452e8 --- /dev/null +++ b/libcxx/test/std/atomics/atomics.types.generic/integral_ref.pass.cpp @@ -0,0 +1,189 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// + +// + +// template <> +// struct atomic_ref +// { +// bool is_lock_free() const volatile; +// bool is_lock_free() const; +// void store(integral desr, memory_order m = memory_order_seq_cst) volatile; +// void store(integral desr, memory_order m = memory_order_seq_cst); +// integral load(memory_order m = memory_order_seq_cst) const volatile; +// integral load(memory_order m = memory_order_seq_cst) const; +// operator integral() const volatile; +// operator integral() const; +// integral exchange(integral desr, +// memory_order m = memory_order_seq_cst) volatile; +// integral exchange(integral desr, memory_order m = memory_order_seq_cst); +// bool compare_exchange_weak(integral& expc, integral desr, +// memory_order s, memory_order f) volatile; +// bool compare_exchange_weak(integral& expc, integral desr, +// memory_order s, memory_order f); +// bool compare_exchange_strong(integral& expc, integral desr, +// memory_order s, memory_order f) volatile; +// bool compare_exchange_strong(integral& expc, integral desr, +// memory_order s, memory_order f); +// bool compare_exchange_weak(integral& expc, integral desr, +// memory_order m = memory_order_seq_cst) volatile; +// bool compare_exchange_weak(integral& expc, integral desr, +// memory_order m = memory_order_seq_cst); +// bool compare_exchange_strong(integral& expc, integral desr, +// memory_order m = memory_order_seq_cst) volatile; +// bool compare_exchange_strong(integral& expc, integral desr, +// memory_order m = memory_order_seq_cst); +// +// integral +// fetch_add(integral op, memory_order m = memory_order_seq_cst) volatile; +// integral fetch_add(integral op, memory_order m = memory_order_seq_cst); +// integral +// fetch_sub(integral op, memory_order m = memory_order_seq_cst) volatile; +// integral fetch_sub(integral op, memory_order m = memory_order_seq_cst); +// integral +// fetch_and(integral op, memory_order m = memory_order_seq_cst) volatile; +// integral fetch_and(integral op, memory_order m = memory_order_seq_cst); +// integral +// fetch_or(integral op, memory_order m = memory_order_seq_cst) volatile; +// integral fetch_or(integral op, memory_order m = memory_order_seq_cst); +// integral +// fetch_xor(integral op, memory_order m = memory_order_seq_cst) volatile; +// integral fetch_xor(integral op, memory_order m = memory_order_seq_cst); +// +// atomic_ref() = delete; +// constexpr atomic_ref(integral& desr); +// atomic_ref(const atomic_ref&) = default; +// atomic_ref& operator=(const atomic_ref&) = delete; +// atomic_ref& operator=(const atomic_ref&) volatile = delete; +// integral operator=(integral desr) volatile; +// integral operator=(integral desr); +// +// integral operator++(int) volatile; +// integral operator++(int); +// integral operator--(int) volatile; +// integral operator--(int); +// integral operator++() volatile; +// integral operator++(); +// integral operator--() volatile; +// integral operator--(); +// integral operator+=(integral op) volatile; +// integral operator+=(integral op); +// integral operator-=(integral op) volatile; +// integral operator-=(integral op); +// integral operator&=(integral op) volatile; +// integral operator&=(integral op); +// integral operator|=(integral op) volatile; +// integral operator|=(integral op); +// integral operator^=(integral op) volatile; +// integral operator^=(integral op); +// }; + +#include +#include + +#include + +#include "test_macros.h" + +template +void do_test() { + T val(0); + assert(&val); + assert(val == T(0)); + A obj(val); + assert(obj.load() == T(0)); + bool b0 = obj.is_lock_free(); + ((void)b0); // mark as unused + obj.store(T(0)); + assert(obj.load() == T(0)); + assert(obj == T(0)); + obj.store(T(1), std::memory_order_release); + assert(obj == T(1)); + assert(obj.load() == T(1)); + assert(obj.load(std::memory_order_acquire) == T(1)); + assert(obj.exchange(T(2)) == T(1)); + assert(obj == T(2)); + assert(obj.exchange(T(3), std::memory_order_relaxed) == T(2)); + assert(obj == T(3)); + T x = obj; + assert(cmpxchg_weak_loop(obj, x, T(2)) == true); + assert(obj == T(2)); + assert(x == T(3)); + assert(obj.compare_exchange_weak(x, T(1)) == false); + assert(obj == T(2)); + assert(x == T(2)); + x = T(2); + assert(obj.compare_exchange_strong(x, T(1)) == true); + assert(obj == T(1)); + assert(x == T(2)); + assert(obj.compare_exchange_strong(x, T(0)) == false); + assert(obj == T(1)); + assert(x == T(1)); + assert((obj = T(0)) == T(0)); + assert(obj == T(0)); + assert(obj++ == T(0)); + assert(obj == T(1)); + assert(++obj == T(2)); + assert(obj == T(2)); + assert(--obj == T(1)); + assert(obj == T(1)); + assert(obj-- == T(1)); + assert(obj == T(0)); + obj = T(2); + assert((obj += T(3)) == T(5)); + assert(obj == T(5)); + assert((obj -= T(3)) == T(2)); + assert(obj == T(2)); + assert((obj |= T(5)) == T(7)); + assert(obj == T(7)); + assert((obj &= T(0xF)) == T(7)); + assert(obj == T(7)); + assert((obj ^= T(0xF)) == T(8)); + assert(obj == T(8)); +} + +template