Skip to content

Commit

Permalink
Fixes #452, fixes #453: Batch memory operations
Browse files Browse the repository at this point in the history
* Now properly setting the batch memory operation parameters for flush-remote-writes
* Now supporting the memory barrier batch-memory-operation (as an single operation, not in an actual batch)
  • Loading branch information
eyalroz committed Feb 10, 2023
1 parent 240c5d3 commit ade02b0
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
22 changes: 19 additions & 3 deletions src/cuda/api/stream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -725,14 +725,30 @@ class stream_t {
*/
void flush_remote_writes() const
{
CUstreamBatchMemOpParams flush_op;
flush_op.operation = CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES;
CUstreamBatchMemOpParams op_params;
op_params.flushRemoteWrites.operation = CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES;
op_params.flushRemoteWrites.flags = 0;
unsigned count = 1;
unsigned flags = 0;
// Let's cross our fingers and assume nothing else needs to be set here...
cuStreamBatchMemOp(associated_stream.handle_, count, &flush_op, flags);
auto status = cuStreamBatchMemOp(associated_stream.handle_, count, &op_params, flags);
throw_if_error_lazy(status, "scheduling a flush-remote-writes memory operation as a 1-op batch");
}

#if CUDA_VERSION >= 11070
void memory_barrier(memory::barrier_scope_t scope) const
{
CUstreamBatchMemOpParams op_params;
op_params.memoryBarrier.operation = CU_STREAM_MEM_OP_BARRIER;
op_params.memoryBarrier.flags = static_cast<unsigned>(scope);
unsigned count = 1;
unsigned flags = 0;
// Let's cross our fingers and assume nothing else needs to be set here...
auto status = cuStreamBatchMemOp(associated_stream.handle_, count, &op_params, flags);
throw_if_error_lazy(status, "scheduling a memory barrier operation as a 1-op batch");
}
#endif

/**
* Enqueue multiple single-value write, wait and flush operations to the device
* (avoiding the overhead of multiple enqueue calls).
Expand Down
8 changes: 8 additions & 0 deletions src/cuda/api/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,14 @@ using range_attribute_t = CUmem_range_attribute;

} // namespace managed

#if CUDA_VERSION >= 11070
enum class barrier_scope_t : typename std::underlying_type<CUstreamMemoryBarrier_flags>::type {
device = CU_STREAM_MEMORY_BARRIER_TYPE_GPU,
system = CU_STREAM_MEMORY_BARRIER_TYPE_SYS
};
#endif // CUDA_VERSION >= 11700


} // namespace memory

/**
Expand Down

0 comments on commit ade02b0

Please sign in to comment.