Skip to content

Commit

Permalink
Fixes #223, #272: Stream attributes:
Browse files Browse the repository at this point in the history
* Can now copy all attributes between streams (with CUDA >= 11).
* Can now get and set streams' synchronization policy (with CUDA >= 11).
  • Loading branch information
eyalroz committed Jan 14, 2022
1 parent 4e30bd2 commit 4a1b8e5
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 7 deletions.
46 changes: 46 additions & 0 deletions examples/by_runtime_api_module/stream_management.cu
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,20 @@ __global__ void increment(char* data, size_t length)
data[global_index]++;
}

#if CUDA_VERSION >= 11000
const char* get_policy_name(cuda::stream::synchronization_policy_t policy)
{
switch(policy) {
case cuda::stream::automatic: return "automatic";
case cuda::stream::spin: return "spin";
case cuda::stream::yield: return "yield";
case cuda::stream::block: return "block";
default:
return "unknown policy";
}
}
#endif // CUDA_VERSION >= 11000

int main(int argc, char **argv)
{
constexpr const size_t N = 50;
Expand Down Expand Up @@ -122,6 +136,38 @@ int main(int argc, char **argv)
cuda::stream::default_priority + 1,
cuda::stream::no_implicit_synchronization_with_default_stream);

#if CUDA_VERSION >= 11000
// Stream synchronization policy and attribute copying

auto initial_policy = stream_1.synchronization_policy();
std::cout
<< "Initial stream synchronization policy is "
<< get_policy_name(initial_policy) << " (numeric value: " << (int) initial_policy << ")\n";
if (initial_policy != stream_2.synchronization_policy()) {
throw std::logic_error("Different synchronization policies for streams created the same way");
}
cuda::stream::synchronization_policy_t alt_policy =
(initial_policy == cuda::stream::yield) ? cuda::stream::block : cuda::stream::yield;
stream_2.set_synchronization_policy(alt_policy);
auto new_s2_policy = stream_2.synchronization_policy();
if (alt_policy != new_s2_policy) {
std::stringstream ss;
ss
<< "Got a different synchronization policy (" << get_policy_name(new_s2_policy) << ")"
<< " than the one we set the stream to (" << get_policy_name(alt_policy) << ")\n";
throw std::logic_error(ss.str());
}
std::cout << "Overwriting all attributes of stream 1 with those of stream 2.\n";
cuda::copy_attributes(stream_1, stream_2);
auto s1_policy_after_copy = stream_1.synchronization_policy();
if (alt_policy != s1_policy_after_copy) {
std::stringstream ss;
ss
<< "Got a different synchronization policy (" << get_policy_name(s1_policy_after_copy) << ")"
<< " than the one we expected after attribute-copying (" << get_policy_name(alt_policy) << ")\n";
throw std::logic_error(ss.str());
}
#endif

constexpr auto buffer_size = 12345678;
auto buffer = cuda::memory::managed::make_unique<char[]>(
Expand Down
86 changes: 86 additions & 0 deletions src/cuda/api/stream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,48 @@ enum : bool {
async = no_implicit_synchronization_with_default_stream,
};

#if CUDA_VERSION >= 11000
/**
* Possible synchronization behavior of a host thread when performing a synchronous action
* on a stream (in particular, synchronizing with a stream).
*/
enum synchronization_policy_t : typename std::underlying_type<cudaSynchronizationPolicy>::type {
/**
* @todo Figure out what this default actually is!
*/
automatic = cudaSyncPolicyAuto,

/**
* @brief Keep control and spin-check for result availability
*
* Instruct CUDA to actively spin when waiting for the stream to
* complete pending actions. This can decrease latency when waiting
* for the device, but may lower the performance of other CPU threads
* working in parallel.
*/
spin = cudaSyncPolicySpin,

/**
* @brief Yield control while waiting for results.
*
* Instruct CUDA to yield its thread when waiting for the stream
* to complete pending actions. This can increase latency when
* waiting for the device, but can increase the performance of other
* CPU threads performing work in parallel.
*
*/
yield = cudaSyncPolicyYield,

/**
* @brief Block the thread until the stream has concluded pending actions.
*
* Instruct CUDA to block the CPU thread on a synchronization
* primitive when waiting for the stream to finish work.
*/
block = cudaSyncPolicyBlockingSync
};
#endif // CUDA_VERSION >= 11000

namespace detail_ {

inline id_t create_on_current_device(
Expand Down Expand Up @@ -556,6 +598,26 @@ class stream_t {
cuda::synchronize(*this);
}

#if CUDA_VERSION >= 11000
stream::synchronization_policy_t synchronization_policy()
{
device::current::detail_::scoped_override_t set_device_for_this_scope(device_id_);
cudaStreamAttrValue wrapped_result{};
auto status = cudaStreamGetAttribute(id_, cudaStreamAttributeSynchronizationPolicy, &wrapped_result);
throw_if_error(status);
return static_cast<stream::synchronization_policy_t>(wrapped_result.syncPolicy);
}

void set_synchronization_policy(stream::synchronization_policy_t policy)
{
device::current::detail_::scoped_override_t set_device_for_this_scope(device_id_);
cudaStreamAttrValue wrapped_value{};
wrapped_value.syncPolicy = static_cast<cudaSynchronizationPolicy>(policy);
auto status = cudaStreamSetAttribute(id_, cudaStreamAttributeSynchronizationPolicy, &wrapped_value);
throw_if_error(status);
}
#endif

protected: // constructor

stream_t(device::id_t device_id, stream::id_t stream_id, bool take_ownership = false) noexcept
Expand Down Expand Up @@ -679,6 +741,30 @@ inline void synchronize(const stream_t& stream)
+ " on CUDA device " + ::std::to_string(stream.device().id()));
}

#if CUDA_VERSION >= 11000
/**
* Overwrite all "attributes" of one stream with those of another
*
* @param dest The stream whose attributes will be overwritten
* @param src The stream whose attributes are to be copied
*
* @note As of CUDA 11.5, the "attributes" are the thread
* synchronization policy and the various L2 access policy window
* settings; see https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#L2_access_policy
* for details.
*/
inline void copy_attributes(const stream_t& dest, const stream_t& src)
{
#ifndef NDEBUG
if (dest.device() != src.device()) {
throw std::invalid_argument("Attempt to copy attributes between streams on different devices");
}
#endif
device::current::scoped_override_t set_device_for_this_scope(dest.device());
auto status = cudaStreamCopyAttributes(dest.id(), src.id());
throw_if_error(status);
}
#endif // CUDA_VERSION >= 11000

} // namespace cuda

Expand Down
19 changes: 12 additions & 7 deletions src/cuda/common/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,7 @@ using kernel_parameter_decay_t = typename kernel_parameter_decay<P>::type;
* device
*/
enum host_thread_synch_scheduling_policy_t : unsigned int {

/**
* @brief Default behavior; yield or spin based on a heuristic.
*
Expand All @@ -549,6 +550,7 @@ enum host_thread_synch_scheduling_policy_t : unsigned int {
* actively spin on the processor.
*/
heuristic = cudaDeviceScheduleAuto,

/**
* @brief Keep control and spin-check for result availability
*
Expand All @@ -559,6 +561,15 @@ enum host_thread_synch_scheduling_policy_t : unsigned int {
*
*/
spin = cudaDeviceScheduleSpin,

/**
* @brief Block the thread until results are available.
*
* Instruct CUDA to block the CPU thread on a synchronization
* primitive when waiting for the device to finish work.
*/
block = cudaDeviceScheduleBlockingSync,

/**
* @brief Yield control while waiting for results.
*
Expand All @@ -568,14 +579,8 @@ enum host_thread_synch_scheduling_policy_t : unsigned int {
* performing work in parallel with the device.
*
*/
block = cudaDeviceScheduleBlockingSync,
/**
* @brief Block the thread until results are available.
*
* Instruct CUDA to block the CPU thread on a synchronization
* primitive when waiting for the device to finish work.
*/
yield = cudaDeviceScheduleYield,

/** see @ref heuristic */
automatic = heuristic,
};
Expand Down

0 comments on commit 4a1b8e5

Please sign in to comment.