diff --git a/.upstream-tests/test/support/nasty_containers.h b/.upstream-tests/test/support/nasty_containers.h index c6584ed76a..bfc89170f1 100644 --- a/.upstream-tests/test/support/nasty_containers.h +++ b/.upstream-tests/test/support/nasty_containers.h @@ -9,12 +9,17 @@ #ifndef NASTY_CONTAINERS_H #define NASTY_CONTAINERS_H -#include -#include -#include +#include +#if defined(_LIBCUDACXX_HAS_VECTOR) +#include +#endif +#if defined(_LIBCUDACXX_HAS_LIST) +#include +#endif #include "test_macros.h" +#if defined(_LIBCUDACXX_HAS_VECTOR) template class nasty_vector { @@ -135,7 +140,9 @@ class nasty_vector template bool operator==(const nasty_vector& x, const nasty_vector& y) { return x.v_ == y.v_; } +#endif +#if defined(_LIBCUDACXX_HAS_LIST) template class nasty_list { @@ -282,6 +289,7 @@ class nasty_list template bool operator==(const nasty_list& x, const nasty_list& y) { return x.l_ == y.l_; } +#endif // Not really a mutex, but can play one in tests class nasty_mutex diff --git a/include/cuda/mutex b/include/cuda/mutex new file mode 100644 index 0000000000..532cc533c8 --- /dev/null +++ b/include/cuda/mutex @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_MUTEX +#define _CUDA_MUTEX + +#include "std/mutex" + +_LIBCUDACXX_BEGIN_NAMESPACE_CUDA + +template +using mutex = _CUDA_VSTD::__mutex_base<_Sco>; + +template +using timed_mutex = _CUDA_VSTD::__mutex_base<_Sco>; + +template +using once_flag = _CUDA_VSTD::__once_flag_base<_Sco>; + +using _CUDA_VSTD::call_once; + +_LIBCUDACXX_END_NAMESPACE_CUDA + +#endif //_CUDA_MUTEX diff --git a/include/cuda/std/detail/__config b/include/cuda/std/detail/__config index dfcf70f413..dce06ffb08 100644 --- a/include/cuda/std/detail/__config +++ b/include/cuda/std/detail/__config @@ -79,10 +79,12 @@ #define _LIBCUDACXX_HAS_NO_PLATFORM_WAIT #define _LIBCUDACXX_HAS_NO_MONOTONIC_CLOCK #define _LIBCUDACXX_HAS_NO_TREE_BARRIER +#define _LIBCUDACXX_HAS_THREAD_API_EXTERNAL +#define _LIBCUDACXX_INLINE_THREADING + #ifdef __CUDACC_RTC__ #define __ELF__ #define _LIBCUDACXX_DISABLE_PRAGMA_GCC_SYSTEM_HEADER - #define _LIBCUDACXX_HAS_THREAD_API_EXTERNAL #define __alignof(x) alignof(x) #define _LIBCUDACXX_LITTLE_ENDIAN #define _LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS @@ -104,9 +106,8 @@ #include "libcxx/include/__config" -#if defined(__CUDA_ARCH__) - #define _LIBCUDACXX_HAS_THREAD_API_CUDA -#elif defined(_LIBCUDACXX_COMPILER_MSVC) +#define _LIBCUDACXX_HAS_THREAD_API_CUDA +#if defined(_LIBCUDACXX_COMPILER_MSVC) #define _LIBCUDACXX_HAS_THREAD_API_WIN32 #endif diff --git a/include/cuda/std/detail/libcxx/include/CMakeLists.txt b/include/cuda/std/detail/libcxx/include/CMakeLists.txt index fa0ebc938c..bfe83f0a68 100644 --- a/include/cuda/std/detail/libcxx/include/CMakeLists.txt +++ b/include/cuda/std/detail/libcxx/include/CMakeLists.txt @@ -117,6 +117,7 @@ set(files __mdspan/submdspan.hpp __mdspan/type_list.hpp __memory/addressof.h + __memory/atomic_load.h __memory/pointer_traits.h __mutex_base __node_handle diff --git a/include/cuda/std/detail/libcxx/include/__memory/atomic_load.h b/include/cuda/std/detail/libcxx/include/__memory/atomic_load.h new file mode 100644 index 0000000000..ae060162ad --- /dev/null +++ b/include/cuda/std/detail/libcxx/include/__memory/atomic_load.h @@ -0,0 +1,69 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCUDACXX___MEMORY_ATOMIMC_LOAD_H +#define _LIBCUDACXX___MEMORY_ATOMIMC_LOAD_H + +#ifndef __cuda_std__ +#include <__config> +#endif //__cuda_std__ + +#include "../atomic" + +#if defined(_LIBCUDACXX_USE_PRAGMA_GCC_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +#ifndef __cuda_std__ + +template +inline _LIBCUDACXX_INLINE_VISIBILITY +_ValueType __libcpp_relaxed_load(_ValueType const* __value) { +#if !defined(_LIBCUDACXX_HAS_NO_THREADS) && \ + defined(__ATOMIC_RELAXED) && \ + (__has_builtin(__atomic_load_n) || defined(_LIBCUDACXX_COMPILER_GCC)) + return __atomic_load_n(__value, __ATOMIC_RELAXED); +#else + return *__value; +#endif +} + +template +inline _LIBCUDACXX_INLINE_VISIBILITY +_ValueType __libcpp_acquire_load(_ValueType const* __value) { +#if !defined(_LIBCUDACXX_HAS_NO_THREADS) && \ + defined(__ATOMIC_ACQUIRE) && \ + (__has_builtin(__atomic_load_n) || defined(_LIBCUDACXX_COMPILER_GCC)) + return __atomic_load_n(__value, __ATOMIC_ACQUIRE); +#else + return *__value; +#endif +} + +#else + +template +inline _LIBCUDACXX_INLINE_VISIBILITY +_ValueType __libcpp_relaxed_load(atomic<_ValueType> const* __value) { + return __value->load(memory_order_relaxed); +} + +template +inline _LIBCUDACXX_INLINE_VISIBILITY +_ValueType __libcpp_acquire_load(atomic<_ValueType> const* __value) { + return __value->load(memory_order_acquire); +} +#endif // __cuda_std__ + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // _LIBCUDACXX___MEMORY_ATOMIMC_LOAD_H diff --git a/include/cuda/std/detail/libcxx/include/__mutex_base b/include/cuda/std/detail/libcxx/include/__mutex_base index bae7c7c210..b8229c6672 100644 --- a/include/cuda/std/detail/libcxx/include/__mutex_base +++ b/include/cuda/std/detail/libcxx/include/__mutex_base @@ -10,23 +10,40 @@ #ifndef _LIBCUDACXX___MUTEX_BASE #define _LIBCUDACXX___MUTEX_BASE +#ifndef __cuda_std__ #include <__config> -#include #include -#include <__threading_support> - -#include +#endif // __cuda_std__ + +#include "__memory/addressof.h" +#include "__memory/atomic_load.h" +#include "__threading_support" +#include "__type_traits/enable_if.h" +#include "__type_traits/is_floating_point.h" +#include "__type_traits/is_nothrow_default_constructible.h" +#include "__utility/unreachable.h" +#include "chrono" +#include "ctime" +#include "semaphore" + +#ifndef __cuda_std__ +#include <__pragma_push> +#endif // __cuda_std__ #if defined(_LIBCUDACXX_USE_PRAGMA_GCC_SYSTEM_HEADER) #pragma GCC system_header #endif -_LIBCUDACXX_PUSH_MACROS -#include <__undef_macros> - - _LIBCUDACXX_BEGIN_NAMESPACE_STD +#ifdef __cuda_std__ +_LIBCUDACXX_INLINE_VISIBILITY +inline void __throw_system_error(int, const char*) +{ + __libcpp_unreachable(); +} +#endif // __cuda_std__ + #ifndef _LIBCUDACXX_HAS_NO_THREADS #ifndef _LIBCUDACXX_THREAD_SAFETY_ANNOTATION @@ -37,34 +54,73 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD # endif #endif // _LIBCUDACXX_THREAD_SAFETY_ANNOTATION +#ifndef __cuda_std__ +template +using __libcpp_mutex_base_t = __libcpp_mutex_t; +#else +template +using __libcpp_mutex_base_t = __atomic_semaphore_base<_Sco,1>; + +#undef _LIBCUDACXX_MUTEX_INITIALIZER +#define _LIBCUDACXX_MUTEX_INITIALIZER {1ll} +#endif // __cuda_std__ -class _LIBCUDACXX_TYPE_VIS _LIBCUDACXX_THREAD_SAFETY_ANNOTATION(capability("mutex")) mutex +template +class _LIBCUDACXX_TYPE_VIS _LIBCUDACXX_THREAD_SAFETY_ANNOTATION(capability("mutex")) __mutex_base { - __libcpp_mutex_t __m_ = _LIBCUDACXX_MUTEX_INITIALIZER; + __libcpp_mutex_base_t<_Sco> __m_ = _LIBCUDACXX_MUTEX_INITIALIZER; public: _LIBCUDACXX_INLINE_VISIBILITY - _LIBCUDACXX_CONSTEXPR mutex() = default; + constexpr __mutex_base() noexcept {} - mutex(const mutex&) = delete; - mutex& operator=(const mutex&) = delete; + __mutex_base(const __mutex_base&) = delete; + __mutex_base& operator=(const __mutex_base&) = delete; -#if defined(_LIBCUDACXX_HAS_TRIVIAL_MUTEX_DESTRUCTION) - ~mutex() = default; +#if defined(_LIBCUDACXX_HAS_TRIVIAL_MUTEX_DESTRUCTION) || defined(__cuda_std__) + ~__mutex_base() = default; #else - ~mutex() _NOEXCEPT; + ~__mutex_base() _NOEXCEPT; #endif +#ifndef _LIBCUDACXX_INLINE_THREADING void lock() _LIBCUDACXX_THREAD_SAFETY_ANNOTATION(acquire_capability()); bool try_lock() _NOEXCEPT _LIBCUDACXX_THREAD_SAFETY_ANNOTATION(try_acquire_capability(true)); void unlock() _NOEXCEPT _LIBCUDACXX_THREAD_SAFETY_ANNOTATION(release_capability()); +#else + _LIBCUDACXX_INLINE_VISIBILITY + void lock() _LIBCUDACXX_THREAD_SAFETY_ANNOTATION(acquire_capability()) { + __m_.acquire(); //while(!__m_.exchange(0)); + } + _LIBCUDACXX_INLINE_VISIBILITY + bool try_lock() _NOEXCEPT _LIBCUDACXX_THREAD_SAFETY_ANNOTATION(try_acquire_capability(true)) { + return __m_.try_acquire(); + } + _LIBCUDACXX_INLINE_VISIBILITY + void unlock() _NOEXCEPT _LIBCUDACXX_THREAD_SAFETY_ANNOTATION(release_capability()) { + __m_.release(); //__m_.store(1); + } + + template + _LIBCUDACXX_INLINE_VISIBILITY + bool try_lock_for(const chrono::duration<_Rep, _Period>& __d) { + return __m_.try_acquire_for(__d); + } - typedef __libcpp_mutex_t* native_handle_type; + template + _LIBCUDACXX_INLINE_VISIBILITY + bool try_lock_until(const chrono::time_point<_Clock, _Duration>& __t) { + return __m_.try_acquire_until(__t); + } +#endif + + typedef __libcpp_mutex_base_t<_Sco>* native_handle_type; _LIBCUDACXX_INLINE_VISIBILITY native_handle_type native_handle() {return &__m_;} }; -static_assert(is_nothrow_default_constructible::value, - "the default constructor for std::mutex must be nothrow"); +using mutex = __mutex_base<0>; + +static_assert(is_nothrow_default_constructible::value, "the default constructor for std::mutex must be nothrow"); struct _LIBCUDACXX_TYPE_VIS defer_lock_t { explicit defer_lock_t() = default; }; struct _LIBCUDACXX_TYPE_VIS try_to_lock_t { explicit try_to_lock_t() = default; }; @@ -78,9 +134,9 @@ extern _LIBCUDACXX_EXPORTED_FROM_ABI const adopt_lock_t adopt_lock; #else -/* _LIBCUDACXX_INLINE_VAR */ constexpr defer_lock_t defer_lock = defer_lock_t(); -/* _LIBCUDACXX_INLINE_VAR */ constexpr try_to_lock_t try_to_lock = try_to_lock_t(); -/* _LIBCUDACXX_INLINE_VAR */ constexpr adopt_lock_t adopt_lock = adopt_lock_t(); +_LIBCUDACXX_CPO_ACCESSIBILITY defer_lock_t defer_lock = defer_lock_t(); +_LIBCUDACXX_CPO_ACCESSIBILITY try_to_lock_t try_to_lock = try_to_lock_t(); +_LIBCUDACXX_CPO_ACCESSIBILITY adopt_lock_t adopt_lock = adopt_lock_t(); #endif @@ -151,8 +207,8 @@ public: } private: - unique_lock(unique_lock const&); // = delete; - unique_lock& operator=(unique_lock const&); // = delete; + unique_lock(unique_lock const&) = delete; + unique_lock& operator=(unique_lock const&) = delete; public: #ifndef _LIBCUDACXX_CXX03_LANG @@ -174,14 +230,18 @@ public: #endif // _LIBCUDACXX_CXX03_LANG - void lock(); - bool try_lock(); + _LIBCUDACXX_INLINE_VISIBILITY void lock(); + _LIBCUDACXX_INLINE_VISIBILITY bool try_lock(); template - bool try_lock_for(const chrono::duration<_Rep, _Period>& __d); + _LIBCUDACXX_INLINE_VISIBILITY + bool try_lock_for(const chrono::duration<_Rep, _Period>& __d); + template - bool try_lock_until(const chrono::time_point<_Clock, _Duration>& __t); + _LIBCUDACXX_INLINE_VISIBILITY + bool try_lock_until(const chrono::time_point<_Clock, _Duration>& __t); + _LIBCUDACXX_INLINE_VISIBILITY void unlock(); _LIBCUDACXX_INLINE_VISIBILITY @@ -201,6 +261,7 @@ public: _LIBCUDACXX_INLINE_VISIBILITY bool owns_lock() const _NOEXCEPT {return __owns_;} + _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_EXPLICIT operator bool () const _NOEXCEPT {return __owns_;} @@ -209,61 +270,71 @@ public: }; template -void -unique_lock<_Mutex>::lock() +_LIBCUDACXX_INLINE_VISIBILITY +void unique_lock<_Mutex>::lock() { +#ifndef _LIBCUDACXX_NO_EXCEPTIONS if (__m_ == nullptr) __throw_system_error(EPERM, "unique_lock::lock: references null mutex"); if (__owns_) __throw_system_error(EDEADLK, "unique_lock::lock: already locked"); +#endif // _LIBCUDACXX_NO_EXCEPTIONS __m_->lock(); __owns_ = true; } template -bool -unique_lock<_Mutex>::try_lock() +_LIBCUDACXX_INLINE_VISIBILITY +bool unique_lock<_Mutex>::try_lock() { +#ifndef _LIBCUDACXX_NO_EXCEPTIONS if (__m_ == nullptr) __throw_system_error(EPERM, "unique_lock::try_lock: references null mutex"); if (__owns_) __throw_system_error(EDEADLK, "unique_lock::try_lock: already locked"); +#endif // _LIBCUDACXX_NO_EXCEPTIONS __owns_ = __m_->try_lock(); return __owns_; } template template -bool -unique_lock<_Mutex>::try_lock_for(const chrono::duration<_Rep, _Period>& __d) +_LIBCUDACXX_INLINE_VISIBILITY +bool unique_lock<_Mutex>::try_lock_for(const chrono::duration<_Rep, _Period>& __d) { +#ifndef _LIBCUDACXX_NO_EXCEPTIONS if (__m_ == nullptr) __throw_system_error(EPERM, "unique_lock::try_lock_for: references null mutex"); if (__owns_) __throw_system_error(EDEADLK, "unique_lock::try_lock_for: already locked"); +#endif // _LIBCUDACXX_NO_EXCEPTIONS __owns_ = __m_->try_lock_for(__d); return __owns_; } template template -bool +_LIBCUDACXX_INLINE_VISIBILITY bool unique_lock<_Mutex>::try_lock_until(const chrono::time_point<_Clock, _Duration>& __t) { +#ifndef _LIBCUDACXX_NO_EXCEPTIONS if (__m_ == nullptr) __throw_system_error(EPERM, "unique_lock::try_lock_until: references null mutex"); if (__owns_) __throw_system_error(EDEADLK, "unique_lock::try_lock_until: already locked"); +#endif // _LIBCUDACXX_NO_EXCEPTIONS __owns_ = __m_->try_lock_until(__t); return __owns_; } -template +template _LIBCUDACXX_INLINE_VISIBILITY void unique_lock<_Mutex>::unlock() { +#ifndef _LIBCUDACXX_NO_EXCEPTIONS if (!__owns_) __throw_system_error(EPERM, "unique_lock::unlock: not locked"); +#endif // _LIBCUDACXX_NO_EXCEPTIONS __m_->unlock(); __owns_ = false; } @@ -274,6 +345,8 @@ void swap(unique_lock<_Mutex>& __x, unique_lock<_Mutex>& __y) _NOEXCEPT {__x.swap(__y);} +#ifndef _LIBCUDACXX_HAS_THREAD_API_CUDA + //enum class cv_status _LIBCUDACXX_DECLARE_STRONG_ENUM(cv_status) { @@ -346,15 +419,17 @@ private: void __do_timed_wait(unique_lock& __lk, chrono::time_point<_Clock, chrono::nanoseconds>) _NOEXCEPT; }; + +#endif // _LIBCUDACXX_HAS_THREAD_API_CUDA #endif // !_LIBCUDACXX_HAS_NO_THREADS template inline _LIBCUDACXX_INLINE_VISIBILITY -typename enable_if +__enable_if_t < is_floating_point<_Rep>::value, chrono::nanoseconds ->::type +> __safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) { using namespace chrono; @@ -377,11 +452,11 @@ __safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) template inline _LIBCUDACXX_INLINE_VISIBILITY -typename enable_if +__enable_if_t < !is_floating_point<_Rep>::value, chrono::nanoseconds ->::type +> __safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) { using namespace chrono; @@ -410,6 +485,8 @@ __safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) } #ifndef _LIBCUDACXX_HAS_NO_THREADS +#ifndef _LIBCUDACXX_HAS_THREAD_API_CUDA + template void condition_variable::wait(unique_lock& __lk, _Predicate __pred) @@ -532,10 +609,13 @@ condition_variable::__do_timed_wait(unique_lock& __lk, wait_for(__lk, __tp - _Clock::now()); } +#endif //_LIBCUDACXX_HAS_THREAD_API_CUDA #endif // !_LIBCUDACXX_HAS_NO_THREADS _LIBCUDACXX_END_NAMESPACE_STD -_LIBCUDACXX_POP_MACROS +#ifndef __cuda_std__ +#include <__pragma_pop> +#endif // __cuda_std__ #endif // _LIBCUDACXX___MUTEX_BASE diff --git a/include/cuda/std/detail/libcxx/include/memory b/include/cuda/std/detail/libcxx/include/memory index 1258a67e16..f59ce610bf 100644 --- a/include/cuda/std/detail/libcxx/include/memory +++ b/include/cuda/std/detail/libcxx/include/memory @@ -663,6 +663,7 @@ void* align(size_t alignment, size_t size, void*& ptr, size_t& space); #include "__iterator/iterator_traits.h" #include "__iterator/iterator.h" #include "__memory/addressof.h" +#include "__memory/atomic_load.h" #include "__memory/pointer_traits.h" #include "__tuple_dir/tuple_indices.h" #include "__type_traits/decay.h" @@ -707,33 +708,6 @@ void* align(size_t alignment, size_t size, void*& ptr, size_t& space); #endif _LIBCUDACXX_BEGIN_NAMESPACE_STD - -template -inline _LIBCUDACXX_INLINE_VISIBILITY -_ValueType __libcpp_relaxed_load(_ValueType const* __value) { -#if !defined(_LIBCUDACXX_HAS_NO_THREADS) && \ - defined(__ATOMIC_RELAXED) && \ - (__has_builtin(__atomic_load_n) || defined(_LIBCUDACXX_COMPILER_GCC)) - return __atomic_load_n(__value, __ATOMIC_RELAXED); -#else - return *__value; -#endif -} - -template -inline _LIBCUDACXX_INLINE_VISIBILITY -_ValueType __libcpp_acquire_load(_ValueType const* __value) { -#if !defined(_LIBCUDACXX_HAS_NO_THREADS) && \ - defined(__ATOMIC_ACQUIRE) && \ - (__has_builtin(__atomic_load_n) || defined(_LIBCUDACXX_COMPILER_GCC)) - return __atomic_load_n(__value, __ATOMIC_ACQUIRE); -#else - return *__value; -#endif -} - -// addressof moved to - template class allocator; template <> diff --git a/include/cuda/std/detail/libcxx/include/mutex b/include/cuda/std/detail/libcxx/include/mutex index 7a454e5609..eab4957455 100644 --- a/include/cuda/std/detail/libcxx/include/mutex +++ b/include/cuda/std/detail/libcxx/include/mutex @@ -186,28 +186,31 @@ template */ +#ifndef __cuda_std__ #include <__config> -#include <__mutex_base> -#include -#include -#include -#ifndef _LIBCUDACXX_CXX03_LANG -#include -#endif -#include -#include <__threading_support> +#include // for __libcpp_acquire_load +#endif // __cuda_std__ + +#include "__mutex_base" +#include "__threading_support" +#include "__utility/forward.h" +#include "cstdint" +#include "functional" +#include "tuple" +#include "version" + +#ifndef __cuda_std__ +#include <__pragma_push> +#endif // __cuda_std__ #if defined(_LIBCUDACXX_USE_PRAGMA_GCC_SYSTEM_HEADER) #pragma GCC system_header #endif -_LIBCUDACXX_PUSH_MACROS -#include <__undef_macros> - - _LIBCUDACXX_BEGIN_NAMESPACE_STD #ifndef _LIBCUDACXX_HAS_NO_THREADS +#ifndef _LIBCUDACXX_HAS_THREAD_API_CUDA class _LIBCUDACXX_TYPE_VIS recursive_mutex { @@ -303,8 +306,7 @@ public: }; template -bool -recursive_timed_mutex::try_lock_until(const chrono::time_point<_Clock, _Duration>& __t) +bool recursive_timed_mutex::try_lock_until(const chrono::time_point<_Clock, _Duration>& __t) { using namespace chrono; __thread_id __id = this_thread::get_id(); @@ -327,10 +329,15 @@ recursive_timed_mutex::try_lock_until(const chrono::time_point<_Clock, _Duration } return false; } +#else + +using timed_mutex = __mutex_base<0>; + +#endif // _LIBCUDACXX_HAS_THREAD_API_CUDA template -int -try_lock(_L0& __l0, _L1& __l1) +_LIBCUDACXX_INLINE_VISIBILITY +int try_lock(_L0& __l0, _L1& __l1) { unique_lock<_L0> __u0(__l0, try_to_lock); if (__u0.owns_lock()) @@ -346,11 +353,9 @@ try_lock(_L0& __l0, _L1& __l1) return 0; } -#ifndef _LIBCUDACXX_CXX03_LANG - template -int -try_lock(_L0& __l0, _L1& __l1, _L2& __l2, _L3&... __l3) +_LIBCUDACXX_INLINE_VISIBILITY +int try_lock(_L0& __l0, _L1& __l1, _L2& __l2, _L3&... __l3) { int __r = 0; unique_lock<_L0> __u0(__l0, try_to_lock); @@ -365,11 +370,9 @@ try_lock(_L0& __l0, _L1& __l1, _L2& __l2, _L3&... __l3) return __r; } -#endif // _LIBCUDACXX_CXX03_LANG - template -void -lock(_L0& __l0, _L1& __l1) +_LIBCUDACXX_INLINE_VISIBILITY +void lock(_L0& __l0, _L1& __l1) { while (true) { @@ -394,11 +397,9 @@ lock(_L0& __l0, _L1& __l1) } } -#ifndef _LIBCUDACXX_CXX03_LANG - template -void -__lock_first(int __i, _L0& __l0, _L1& __l1, _L2& __l2, _L3& ...__l3) +_LIBCUDACXX_INLINE_VISIBILITY +void __lock_first(int __i, _L0& __l0, _L1& __l1, _L2& __l2, _L3& ...__l3) { while (true) { @@ -469,8 +470,6 @@ void __unlock(_L0& __l0, _L1& __l1, _L2& __l2, _L3&... __l3) { _CUDA_VSTD::__unlock(__l2, __l3...); } -#endif // _LIBCUDACXX_CXX03_LANG - #if _LIBCUDACXX_STD_VER > 14 template class _LIBCUDACXX_TEMPLATE_VIS scoped_lock; @@ -478,6 +477,7 @@ class _LIBCUDACXX_TEMPLATE_VIS scoped_lock; template <> class _LIBCUDACXX_TEMPLATE_VIS scoped_lock<> { public: + _LIBCUDACXX_INLINE_VISIBILITY explicit scoped_lock() {} ~scoped_lock() = default; @@ -495,13 +495,13 @@ public: private: mutex_type& __m_; public: - explicit scoped_lock(mutex_type & __m) _LIBCUDACXX_THREAD_SAFETY_ANNOTATION(acquire_capability(__m)) + _LIBCUDACXX_INLINE_VISIBILITY explicit scoped_lock(mutex_type & __m) _LIBCUDACXX_THREAD_SAFETY_ANNOTATION(acquire_capability(__m)) : __m_(__m) {__m_.lock();} - ~scoped_lock() _LIBCUDACXX_THREAD_SAFETY_ANNOTATION(release_capability()) {__m_.unlock();} + _LIBCUDACXX_INLINE_VISIBILITY ~scoped_lock() _LIBCUDACXX_THREAD_SAFETY_ANNOTATION(release_capability()) {__m_.unlock();} _LIBCUDACXX_INLINE_VISIBILITY - explicit scoped_lock(adopt_lock_t, mutex_type& __m) _LIBCUDACXX_THREAD_SAFETY_ANNOTATION(requires_capability(__m)) + _LIBCUDACXX_INLINE_VISIBILITY explicit scoped_lock(adopt_lock_t, mutex_type& __m) _LIBCUDACXX_THREAD_SAFETY_ANNOTATION(requires_capability(__m)) : __m_(__m) {} scoped_lock(scoped_lock const&) = delete; @@ -550,61 +550,39 @@ private: #endif // _LIBCUDACXX_STD_VER > 14 #endif // !_LIBCUDACXX_HAS_NO_THREADS -struct _LIBCUDACXX_TEMPLATE_VIS once_flag; - -#ifndef _LIBCUDACXX_CXX03_LANG - -template -_LIBCUDACXX_INLINE_VISIBILITY -void call_once(once_flag&, _Callable&&, _Args&&...); - -#else // _LIBCUDACXX_CXX03_LANG +template +struct _LIBCUDACXX_TEMPLATE_VIS __once_flag_base; -template +template _LIBCUDACXX_INLINE_VISIBILITY -void call_once(once_flag&, _Callable&); +void call_once(__once_flag_base<_Sco>&, _Callable&&, _Args&&...); -template -_LIBCUDACXX_INLINE_VISIBILITY -void call_once(once_flag&, const _Callable&); - -#endif // _LIBCUDACXX_CXX03_LANG - -struct _LIBCUDACXX_TEMPLATE_VIS once_flag +template +struct _LIBCUDACXX_TEMPLATE_VIS __once_flag_base { - _LIBCUDACXX_INLINE_VISIBILITY - _LIBCUDACXX_CONSTEXPR - once_flag() _NOEXCEPT : __state_(0) {} + constexpr __once_flag_base() noexcept = default; #if defined(_LIBCUDACXX_ABI_MICROSOFT) - typedef uintptr_t _State_type; + typedef uintptr_t _State_data_type; #else - typedef unsigned long _State_type; + typedef unsigned long _State_data_type; #endif +#ifndef _LIBCUDACXX_INLINE_THREADING + using _State_type = _State_data_type; +#else + using _State_type = atomic<_State_data_type>; +#endif // _LIBCUDACXX_INLINE_THREADING + + _State_type __state_{0}; private: - once_flag(const once_flag&); // = delete; - once_flag& operator=(const once_flag&); // = delete; - - _State_type __state_; - -#ifndef _LIBCUDACXX_CXX03_LANG - template - friend - void call_once(once_flag&, _Callable&&, _Args&&...); -#else // _LIBCUDACXX_CXX03_LANG - template - friend - void call_once(once_flag&, _Callable&); - - template - friend - void call_once(once_flag&, const _Callable&); -#endif // _LIBCUDACXX_CXX03_LANG + + __once_flag_base(const __once_flag_base&) = delete; + __once_flag_base& operator=(const __once_flag_base&) = delete; }; -#ifndef _LIBCUDACXX_CXX03_LANG +using once_flag = __once_flag_base<0>; template class __call_once_param @@ -630,82 +608,52 @@ private: } }; -#else - template -class __call_once_param -{ - _Fp& __f_; -public: - _LIBCUDACXX_INLINE_VISIBILITY - explicit __call_once_param(_Fp& __f) : __f_(__f) {} - - _LIBCUDACXX_INLINE_VISIBILITY - void operator()() - { - __f_(); - } -}; - -#endif - -template -void -__call_once_proxy(void* __vp) +_LIBCUDACXX_INLINE_VISIBILITY +void __call_once_proxy(void* __vp) { __call_once_param<_Fp>* __p = static_cast<__call_once_param<_Fp>*>(__vp); (*__p)(); } -_LIBCUDACXX_FUNC_VIS void __call_once(volatile once_flag::_State_type&, void*, - void (*)(void*)); - -#ifndef _LIBCUDACXX_CXX03_LANG - -template -inline _LIBCUDACXX_INLINE_VISIBILITY -void -call_once(once_flag& __flag, _Callable&& __func, _Args&&... __args) -{ - if (__libcpp_acquire_load(&__flag.__state_) != ~once_flag::_State_type(0)) - { - typedef tuple<_Callable&&, _Args&&...> _Gp; - _Gp __f(_CUDA_VSTD::forward<_Callable>(__func), _CUDA_VSTD::forward<_Args>(__args)...); - __call_once_param<_Gp> __p(__f); - __call_once(__flag.__state_, &__p, &__call_once_proxy<_Gp>); - } -} - -#else // _LIBCUDACXX_CXX03_LANG - -template +#ifndef _LIBCUDACXX_INLINE_THREADING +template +_LIBCUDACXX_FUNC_VIS +void __call_once(volatile typename __once_flag_base<_Sco>::_State_type&, void*, void (*)(void*)); +#else +template inline _LIBCUDACXX_INLINE_VISIBILITY -void -call_once(once_flag& __flag, _Callable& __func) -{ - if (__libcpp_acquire_load(&__flag.__state_) != ~once_flag::_State_type(0)) +void __call_once(volatile typename __once_flag_base<_Sco>::_State_type& __s, void* __p, void (* __f)(void*)) { - __call_once_param<_Callable> __p(__func); - __call_once(__flag.__state_, &__p, &__call_once_proxy<_Callable>); + typename __once_flag_base<_Sco>::_State_data_type __once_expect = 0; + if(__s.compare_exchange_strong(__once_expect, typename __once_flag_base<_Sco>::_State_data_type(1), memory_order_acquire)) + { + __f(__p); + __s.store(~typename __once_flag_base<_Sco>::_State_data_type(0), memory_order_release); + __s.notify_all(); + } + else if(__once_expect == 1) + __s.wait(__once_expect); } -} +#endif // _LIBCUDACXX_INLINE_THREADING -template +template inline _LIBCUDACXX_INLINE_VISIBILITY -void -call_once(once_flag& __flag, const _Callable& __func) +void call_once(__once_flag_base<_Sco>& __flag, _Callable&& __func, _Args&&... __args) { - if (__libcpp_acquire_load(&__flag.__state_) != ~once_flag::_State_type(0)) + if (__libcpp_acquire_load(&__flag.__state_) != ~typename __once_flag_base<_Sco>::_State_data_type(0)) { - __call_once_param __p(__func); - __call_once(__flag.__state_, &__p, &__call_once_proxy); + typedef tuple<_Callable&&, _Args&&...> _Gp; + _Gp __f(_CUDA_VSTD::forward<_Callable>(__func), _CUDA_VSTD::forward<_Args>(__args)...); + __call_once_param<_Gp> __p(__f); + __call_once<_Sco>(__flag.__state_, &__p, &__call_once_proxy<_Gp>); } } -#endif // _LIBCUDACXX_CXX03_LANG - _LIBCUDACXX_END_NAMESPACE_STD -_LIBCUDACXX_POP_MACROS +#ifndef __cuda_std__ +#include <__pragma_pop> +#endif // __cuda_std__ #endif // _LIBCUDACXX_MUTEX diff --git a/include/cuda/std/mutex b/include/cuda/std/mutex new file mode 100644 index 0000000000..c60e649adc --- /dev/null +++ b/include/cuda/std/mutex @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_STD_MUTEX +#define _CUDA_STD_MUTEX + +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 +# error "CUDA synchronization primitives are only supported for sm_70 and up." +#endif + +#include "detail/__config" + +#include "detail/__pragma_push" + +#include "detail/libcxx/include/mutex" + +#include "detail/__pragma_pop" + +#endif //_CUDA_STD_MUTEX diff --git a/libcxx/src/mutex.cpp b/libcxx/src/mutex.cpp index 49352a005a..1744172f4d 100644 --- a/libcxx/src/mutex.cpp +++ b/libcxx/src/mutex.cpp @@ -27,6 +27,7 @@ const adopt_lock_t adopt_lock{}; // ~mutex is defined elsewhere +template<> void mutex::lock() { @@ -35,12 +36,14 @@ mutex::lock() __throw_system_error(ec, "mutex lock failed"); } +template<> bool mutex::try_lock() _NOEXCEPT { return __libcpp_mutex_trylock(&__m_); } +template<> void mutex::unlock() _NOEXCEPT { @@ -200,7 +203,8 @@ _LIBCUDACXX_SAFE_STATIC static __libcpp_mutex_t mut = _LIBCUDACXX_MUTEX_INITIALI _LIBCUDACXX_SAFE_STATIC static __libcpp_condvar_t cv = _LIBCUDACXX_CONDVAR_INITIALIZER; #endif -void __call_once(volatile once_flag::_State_type& flag, void* arg, +template<> +void __call_once<0>(volatile typename __once_flag_base<0>::_State_type& flag, void* arg, void (*func)(void*)) { #if defined(_LIBCUDACXX_HAS_NO_THREADS)