diff --git a/src/cuda/api/stream.hpp b/src/cuda/api/stream.hpp
index 6976d359..e6beb8db 100644
--- a/src/cuda/api/stream.hpp
+++ b/src/cuda/api/stream.hpp
@@ -725,14 +725,30 @@ class stream_t {
 		 */
 		void flush_remote_writes() const
 		{
-			CUstreamBatchMemOpParams flush_op;
-			flush_op.operation = CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES;
+			CUstreamBatchMemOpParams op_params;
+			op_params.flushRemoteWrites.operation = CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES;
+			op_params.flushRemoteWrites.flags = 0;
 			unsigned count = 1;
 			unsigned flags = 0;
 			// Let's cross our fingers and assume nothing else needs to be set here...
-			cuStreamBatchMemOp(associated_stream.handle_, count, &flush_op, flags);
+			auto status = cuStreamBatchMemOp(associated_stream.handle_, count, &op_params, flags);
+			throw_if_error_lazy(status, "scheduling a flush-remote-writes memory operation as a 1-op batch");
 		}
 
+#if CUDA_VERSION >= 11070
+		void memory_barrier(memory::barrier_scope_t scope) const
+		{
+			CUstreamBatchMemOpParams op_params;
+			op_params.memoryBarrier.operation = CU_STREAM_MEM_OP_BARRIER;
+			op_params.memoryBarrier.flags = 0;
+			unsigned count = 1;
+			unsigned flags = 0;
+			// Let's cross our fingers and assume nothing else needs to be set here...
+			auto status = cuStreamBatchMemOp(associated_stream.handle_, count, &op_params, flags);
+			throw_if_error_lazy(status, "scheduling a memory barrier operation as a 1-op batch");
+		}
+#endif
+
 		/**
 		 * Enqueue multiple single-value write, wait and flush operations to the device
 		 * (avoiding the overhead of multiple enqueue calls).
diff --git a/src/cuda/api/types.hpp b/src/cuda/api/types.hpp
index 9bcfc894..ae3dcbd1 100644
--- a/src/cuda/api/types.hpp
+++ b/src/cuda/api/types.hpp
@@ -668,6 +668,14 @@ using range_attribute_t = CUmem_range_attribute;
 
 } // namespace managed
 
+#if CUDA_VERSION >= 11070
+enum class barrier_scope_t : typename std::underlying_type<CUstreamMemoryBarrier_flags>::type {
+	device = CU_STREAM_MEMORY_BARRIER_TYPE_GPU,
+	system = CU_STREAM_MEMORY_BARRIER_TYPE_SYS
+};
+#endif // CUDA_VERSION >= 11700
+
+
 } // namespace memory
 
 /**