From bc80e36826fc8a8369cff8be26f31988ce8765ba Mon Sep 17 00:00:00 2001 From: Wesley Maxey Date: Fri, 23 Jul 2021 21:27:54 -0700 Subject: [PATCH] Extend the test to measure both phases, make barrier_(try_)wait_parity member functions --- .../heterogeneous/barrier_parity.pass.cpp | 11 +++-- include/cuda/std/barrier | 46 ++++++------------- libcxx/include/barrier | 34 ++++++++++++-- 3 files changed, 52 insertions(+), 39 deletions(-) diff --git a/.upstream-tests/test/heterogeneous/barrier_parity.pass.cpp b/.upstream-tests/test/heterogeneous/barrier_parity.pass.cpp index fbf76ddf3f..2c4460995b 100644 --- a/.upstream-tests/test/heterogeneous/barrier_parity.pass.cpp +++ b/.upstream-tests/test/heterogeneous/barrier_parity.pass.cpp @@ -49,7 +49,7 @@ struct barrier_arrive_and_wait }; template -struct barrier_arrive_parity_wait +struct barrier_parity_wait { using async = cuda::std::true_type; @@ -59,7 +59,7 @@ struct barrier_arrive_parity_wait { data.parity_waiting.store(true, cuda::std::memory_order_release); data.parity_waiting.notify_all(); - cuda::barrier_wait_parity(&data.barrier, Phase); + data.barrier.wait_parity(Phase); } }; @@ -74,9 +74,14 @@ struct clear_token }; using aw_aw_pw = performer_list< + barrier_parity_wait, + barrier_arrive_and_wait, + barrier_arrive_and_wait, + async_tester_fence, + clear_token, + barrier_parity_wait, barrier_arrive_and_wait, barrier_arrive_and_wait, - barrier_arrive_parity_wait, async_tester_fence, clear_token >; diff --git a/include/cuda/std/barrier b/include/cuda/std/barrier index f9f31b93ab..d24b2a7838 100644 --- a/include/cuda/std/barrier +++ b/include/cuda/std/barrier @@ -75,38 +75,6 @@ _LIBCUDACXX_END_NAMESPACE_CUDA_DEVICE _LIBCUDACXX_BEGIN_NAMESPACE_CUDA -template -inline _LIBCUDACXX_INLINE_VISIBILITY -bool barrier_try_wait_parity(__Barrier const* __this, bool __parity) -{ - return __this->__try_wait_parity(__parity); -} - -template -struct __barrier_poll_tester_parity { - __Barrier const* __this; - bool __parity; - - _LIBCUDACXX_INLINE_VISIBILITY - __barrier_poll_tester_parity(__Barrier const* __this_, bool __parity_) - : __this(__this_) - , __parity(__parity_) - {} - - inline _LIBCUDACXX_INLINE_VISIBILITY - bool operator()() const - { - return barrier_try_wait_parity(__this, __parity); - } -}; - -template -inline _LIBCUDACXX_INLINE_VISIBILITY -void barrier_wait_parity(__Barrier const* __this, bool __parity) -{ - _CUDA_VSTD::__libcpp_thread_poll_with_backoff(__barrier_poll_tester_parity<__Barrier>(__this, __parity)); -} - template<> class barrier : public __block_scope_barrier_base { using __barrier_base = std::__barrier_base; @@ -161,7 +129,7 @@ public: else #endif { - return __barrier.__try_wait_parity(__parity); + return __barrier.try_wait_parity(__parity); } } @@ -252,6 +220,18 @@ public: _CUDA_VSTD::__libcpp_thread_poll_with_backoff(std::__barrier_poll_tester(this, _CUDA_VSTD::move(__phase))); } + inline _LIBCUDACXX_INLINE_VISIBILITY + bool try_wait_parity(bool __parity) const + { + return __try_wait_parity(__parity); + } + + inline _LIBCUDACXX_INLINE_VISIBILITY + void wait_parity(bool __parity) const + { + _CUDA_VSTD::__libcpp_thread_poll_with_backoff(std::__barrier_poll_tester_parity(this, __parity)); + } + inline _LIBCUDACXX_INLINE_VISIBILITY void arrive_and_wait() { diff --git a/libcxx/include/barrier b/libcxx/include/barrier index 125fa6625d..83abb95697 100644 --- a/libcxx/include/barrier +++ b/libcxx/include/barrier @@ -298,6 +298,24 @@ struct __barrier_poll_tester { } }; +template +struct __barrier_poll_tester_parity { + __Barrier const* __this; + bool __parity; + + _LIBCUDACXX_INLINE_VISIBILITY + __barrier_poll_tester_parity(__Barrier const* __this_, bool __parity_) + : __this(__this_) + , __parity(__parity_) + {} + + inline _LIBCUDACXX_INLINE_VISIBILITY + bool operator()() const + { + return __this->try_wait_parity(__parity); + } +}; + template class __barrier_base<__empty_completion, _Sco> { @@ -325,6 +343,11 @@ private: uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire); return ((__current & __phase_bit) != __phase); } + _LIBCUDACXX_INLINE_VISIBILITY + bool __try_wait_parity(bool __parity) const + { + return __try_wait_phase(__parity ? __phase_bit : 0); + } public: __barrier_base() = default; @@ -340,9 +363,9 @@ public: __barrier_base& operator=(__barrier_base const&) = delete; _LIBCUDACXX_INLINE_VISIBILITY - bool __try_wait_parity(bool __parity) const + bool try_wait_parity(bool __parity) const { - return __try_wait_phase(__parity ? __phase_bit : 0); + return __try_wait_parity(__parity); } _LIBCUDACXX_INLINE_VISIBILITY bool try_wait(arrival_token __old) const @@ -364,7 +387,12 @@ public: _LIBCUDACXX_INLINE_VISIBILITY void wait(arrival_token&& __phase) const { - __libcpp_thread_poll_with_backoff(__barrier_poll_tester<__barrier_base<__empty_completion, _Sco>>(this, _CUDA_VSTD::move(__phase))); + __libcpp_thread_poll_with_backoff(__barrier_poll_tester<__barrier_base>(this, _CUDA_VSTD::move(__phase))); + } + _LIBCUDACXX_INLINE_VISIBILITY + void wait_parity(bool __parity) const + { + __libcpp_thread_poll_with_backoff(__barrier_poll_tester_parity<__barrier_base>(this, __parity)); } _LIBCUDACXX_INLINE_VISIBILITY void arrive_and_wait()