Skip to content

Commit

Permalink
Extend the test to measure both phases, make barrier_(try_)wait_parit…
Browse files Browse the repository at this point in the history
…y member functions
  • Loading branch information
wmaxey committed Jul 27, 2021
1 parent 1464783 commit bc80e36
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 39 deletions.
11 changes: 8 additions & 3 deletions .upstream-tests/test/heterogeneous/barrier_parity.pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ struct barrier_arrive_and_wait
};

template <bool Phase>
struct barrier_arrive_parity_wait
struct barrier_parity_wait
{
using async = cuda::std::true_type;

Expand All @@ -59,7 +59,7 @@ struct barrier_arrive_parity_wait
{
data.parity_waiting.store(true, cuda::std::memory_order_release);
data.parity_waiting.notify_all();
cuda::barrier_wait_parity(&data.barrier, Phase);
data.barrier.wait_parity(Phase);
}
};

Expand All @@ -74,9 +74,14 @@ struct clear_token
};

using aw_aw_pw = performer_list<
barrier_parity_wait<false>,
barrier_arrive_and_wait,
barrier_arrive_and_wait,
async_tester_fence,
clear_token,
barrier_parity_wait<true>,
barrier_arrive_and_wait,
barrier_arrive_and_wait,
barrier_arrive_parity_wait<false>,
async_tester_fence,
clear_token
>;
Expand Down
46 changes: 13 additions & 33 deletions include/cuda/std/barrier
Original file line number Diff line number Diff line change
Expand Up @@ -75,38 +75,6 @@ _LIBCUDACXX_END_NAMESPACE_CUDA_DEVICE

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA

template<class __Barrier>
inline _LIBCUDACXX_INLINE_VISIBILITY
bool barrier_try_wait_parity(__Barrier const* __this, bool __parity)
{
return __this->__try_wait_parity(__parity);
}

template<class __Barrier>
struct __barrier_poll_tester_parity {
__Barrier const* __this;
bool __parity;

_LIBCUDACXX_INLINE_VISIBILITY
__barrier_poll_tester_parity(__Barrier const* __this_, bool __parity_)
: __this(__this_)
, __parity(__parity_)
{}

inline _LIBCUDACXX_INLINE_VISIBILITY
bool operator()() const
{
return barrier_try_wait_parity(__this, __parity);
}
};

template<class __Barrier>
inline _LIBCUDACXX_INLINE_VISIBILITY
void barrier_wait_parity(__Barrier const* __this, bool __parity)
{
_CUDA_VSTD::__libcpp_thread_poll_with_backoff(__barrier_poll_tester_parity<__Barrier>(__this, __parity));
}

template<>
class barrier<thread_scope_block, std::__empty_completion> : public __block_scope_barrier_base {
using __barrier_base = std::__barrier_base<std::__empty_completion, (int)thread_scope_block>;
Expand Down Expand Up @@ -161,7 +129,7 @@ public:
else
#endif
{
return __barrier.__try_wait_parity(__parity);
return __barrier.try_wait_parity(__parity);
}
}

Expand Down Expand Up @@ -252,6 +220,18 @@ public:
_CUDA_VSTD::__libcpp_thread_poll_with_backoff(std::__barrier_poll_tester<barrier>(this, _CUDA_VSTD::move(__phase)));
}

inline _LIBCUDACXX_INLINE_VISIBILITY
bool try_wait_parity(bool __parity) const
{
return __try_wait_parity(__parity);
}

inline _LIBCUDACXX_INLINE_VISIBILITY
void wait_parity(bool __parity) const
{
_CUDA_VSTD::__libcpp_thread_poll_with_backoff(std::__barrier_poll_tester_parity<barrier>(this, __parity));
}

inline _LIBCUDACXX_INLINE_VISIBILITY
void arrive_and_wait()
{
Expand Down
34 changes: 31 additions & 3 deletions libcxx/include/barrier
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,24 @@ struct __barrier_poll_tester {
}
};

template<class __Barrier>
struct __barrier_poll_tester_parity {
__Barrier const* __this;
bool __parity;

_LIBCUDACXX_INLINE_VISIBILITY
__barrier_poll_tester_parity(__Barrier const* __this_, bool __parity_)
: __this(__this_)
, __parity(__parity_)
{}

inline _LIBCUDACXX_INLINE_VISIBILITY
bool operator()() const
{
return __this->try_wait_parity(__parity);
}
};

template<int _Sco>
class __barrier_base<__empty_completion, _Sco> {

Expand Down Expand Up @@ -325,6 +343,11 @@ private:
uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
return ((__current & __phase_bit) != __phase);
}
_LIBCUDACXX_INLINE_VISIBILITY
bool __try_wait_parity(bool __parity) const
{
return __try_wait_phase(__parity ? __phase_bit : 0);
}

public:
__barrier_base() = default;
Expand All @@ -340,9 +363,9 @@ public:
__barrier_base& operator=(__barrier_base const&) = delete;

_LIBCUDACXX_INLINE_VISIBILITY
bool __try_wait_parity(bool __parity) const
bool try_wait_parity(bool __parity) const
{
return __try_wait_phase(__parity ? __phase_bit : 0);
return __try_wait_parity(__parity);
}
_LIBCUDACXX_INLINE_VISIBILITY
bool try_wait(arrival_token __old) const
Expand All @@ -364,7 +387,12 @@ public:
_LIBCUDACXX_INLINE_VISIBILITY
void wait(arrival_token&& __phase) const
{
__libcpp_thread_poll_with_backoff(__barrier_poll_tester<__barrier_base<__empty_completion, _Sco>>(this, _CUDA_VSTD::move(__phase)));
__libcpp_thread_poll_with_backoff(__barrier_poll_tester<__barrier_base>(this, _CUDA_VSTD::move(__phase)));
}
_LIBCUDACXX_INLINE_VISIBILITY
void wait_parity(bool __parity) const
{
__libcpp_thread_poll_with_backoff(__barrier_poll_tester_parity<__barrier_base>(this, __parity));
}
_LIBCUDACXX_INLINE_VISIBILITY
void arrive_and_wait()
Expand Down

0 comments on commit bc80e36

Please sign in to comment.