New example program: memory_pools

eyalroz · Apr 16, 2023 · 951e298 · 951e298
1 parent d9e6e4b
commit 951e298
Show file tree

Hide file tree

Showing 2 changed files with 259 additions and 0 deletions.
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -124,6 +124,9 @@ add_executable(version_management by_api_module/version_management.cpp)
 add_executable(error_handling by_api_module/error_handling.cu)
 add_executable(device_management by_api_module/device_management.cpp)
 add_executable(context_management by_api_module/context_management.cpp)
+if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.2")
+	add_executable(memory_pools by_api_module/memory_pools.cpp)
+endif()
 add_executable(execution_control by_api_module/execution_control.cu)
 set_property(TARGET execution_control  PROPERTY CUDA_SEPARABLE_COMPILATION ON)
 

diff --git a/examples/by_api_module/memory_pools.cpp b/examples/by_api_module/memory_pools.cpp
@@ -0,0 +1,256 @@
+#include <cuda/api.hpp>
+#include "../common.hpp"
+
+//
+//__global__ void print_message(const char* message, size_t length)
+//{
+//	printf("%*s\n", length, message);
+//}
+
+static constexpr const size_t region_size { 1024UL }; // 64 MiB
+
+void check_getters_and_rewrapping(const cuda::memory::pool_t &pool);
+
+cuda::optional<cuda::device::id_t> maybe_get_p2p_peer_id(cuda::device::id_t device_id)
+{
+	const auto& dev = cuda::device::get(device_id);
+	for(const auto& peer : cuda::devices()) {
+		if (peer.id() == device_id) { continue; }
+		if (cuda::device::peer_to_peer::can_access_each_other(dev, peer))  {
+			return peer.id();
+		}
+	}
+	return cuda::nullopt;
+}
+
+void play_with_attributes(const cuda::memory::pool_t &pool)
+{
+	bool allow_internal_deps = pool.get_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES>();
+	bool allow_opportunistic = pool.get_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC>();
+	bool when_dependent_on_free = pool.get_attribute<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES>();
+	if (not (allow_internal_deps and allow_opportunistic and when_dependent_on_free)) {
+		die_("Pool reuse policy differs from expect default");
+	}
+	auto reuse_policy = pool.reuse_policy();
+	if (   reuse_policy.when_dependent_on_free != when_dependent_on_free
+	    or reuse_policy.independent_but_actually_freed != allow_opportunistic
+	    or reuse_policy.allow_waiting_for_frees != allow_internal_deps)
+	{
+		die_("Some aggregate reuse policy fields doesn't equal attributes obtained individually");
+	}
+	reuse_policy.when_dependent_on_free = false;
+
+	pool.set_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES>(false);
+	pool.set_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC>(false);
+	pool.set_attribute<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES>(false);
+
+	allow_internal_deps = pool.get_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES>();
+	allow_opportunistic = pool.get_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC>();
+	when_dependent_on_free = pool.get_attribute<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES>();
+
+	if (allow_internal_deps) {
+		die_("Pool reuse policy ALLOW_INTERNAL_DEPS is true despite having been set to false!");
+	}
+
+	if (allow_opportunistic) {
+		die_("Pool reuse policy ALLOW_OPPORTUNISTIC is true despite having been set to false!");
+	}
+
+	if (when_dependent_on_free) {
+		die_("Pool reuse policy FOLLOW_EVENT_DEPENDENCIES is true despite having been set to false!");
+	}
+
+#if CUDA_VERSION >= 11300
+	auto max_use_since_reset = pool.get_attribute<CU_MEMPOOL_ATTR_USED_MEM_HIGH>();
+	if (max_use_since_reset != region_size) {
+		die_("Unexpected 'high-use watermark' property value for a memory pool");
+	}
+
+	auto temp_region = pool.allocate(stream, 2 * region_size);
+
+	auto currently_used_mem = pool.get_attribute<CU_MEMPOOL_ATTR_USED_MEM_CURRENT>();
+	if (currently_used_mem != 3 * region_size) {
+		die_("Unexpected amount of currently-used memory reported by a memory pool");
+	}
+
+	stream.enqueue.free(temp_region);
+
+	currently_used_mem = pool.get_attribute<CU_MEMPOOL_ATTR_USED_MEM_CURRENT>();
+	if (currently_used_mem != region_size) {
+		die_("Unexpected amount of currently-used memory reported by a memory pool");
+	}
+
+	max_use_since_reset = pool.get_attribute<CU_MEMPOOL_ATTR_USED_MEM_HIGH>();
+	if (max_use_since_reset != 3 * region_size) {
+		die_("Unexpected 'high-use watermark' property value for a memory pool");
+	}
+
+	pool.set_attribute<CU_MEMPOOL_ATTR_USED_MEM_HIGH>(0);
+
+	max_use_since_reset = pool.get_attribute<CU_MEMPOOL_ATTR_USED_MEM_HIGH>();
+	if (max_use_since_reset != region_size) {
+		die_("Unexpected 'high-use watermark' property value for a memory pool, after a reset: Expected "
+			 + std::to_string(region_size) + " but got " + std::to_string(max_use_since_reset));
+	}
+#endif // CUDA_VERSION >= 11030
+
+	pool.set_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES>(true);
+	pool.set_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC>(true);
+	pool.set_attribute<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES>(true);
+}
+
+void copy_through_pool_allocation(
+	const cuda::stream_t& stream,
+	cuda::memory::region_t pool_allocated_region)
+{
+	auto overwritten = "This will initialize the host buffer and should get overwritten.";
+	auto host_buffer_uptr = std::unique_ptr<char[]>(new char[region_size]); // replace this with make_unique in C++14
+	auto host_buffer = cuda::span<char>{host_buffer_uptr.get(), region_size};
+	strcpy(host_buffer.data(), overwritten);
+	(pool_allocated_region.size() == region_size) or die_("Unexpected allocation size");
+	auto message = [](const char* msg) {
+		return cuda::span<const char>{msg, strlen(msg) + 1};
+	}("I get copied around");
+	stream.enqueue.copy(pool_allocated_region, message);
+	auto async_device_region = stream.enqueue.allocate(region_size);
+	stream.enqueue.copy(async_device_region, pool_allocated_region);
+	stream.enqueue.copy(host_buffer, pool_allocated_region);
+	stream.synchronize();
+	if (strncmp(message.data(), host_buffer.data(), message.size()) != 0) {
+		auto len_of_overwritten = strlen(overwritten);
+		host_buffer[len_of_overwritten] = '\0';
+		std::ostringstream oss;
+		oss << "Unexpected resulting buffer on the host side.\n"
+			<< "Expected:            \"" << message.data() << "\"\n"
+			<< "Actual (up to  " << len_of_overwritten << " chars): \"" << host_buffer.data() << "\"\n";
+		die_(oss.str());
+	}
+}
+
+cuda::memory::access_permissions_t
+try_forbidding_same_device_access(int device_id, cuda::memory::pool_t &pool)
+{
+	cuda::memory::access_permissions_t permissions;
+	permissions.read = true;
+	permissions.write = false;
+	bool got_expected_exception = false;
+	try {
+		pool.set_access_permissions(cuda::device::get(device_id), permissions);
+	}
+	catch(std::invalid_argument& exception) {
+		got_expected_exception = true;
+	}
+	if (not got_expected_exception) {
+		die_("Unexpected success in setting a device's access permissions to a pool using"
+		"that device; it should have failed");
+	}
+	return permissions;
+}
+
+// Should only run when peer and device can access each other
+void try_writing_to_pool_allocation_without_permission(
+	const cuda::stream_t&                stream,
+	cuda::memory::pool_t&                pool,
+	cuda::memory::region_t&              pool_allocated_region,
+	cuda::memory::access_permissions_t&  permissions,
+	cuda::device_t&                      peer)
+{
+	permissions.read = true;
+	permissions.write = false;
+	pool.set_access_permissions(peer, permissions);
+	std::string str{"hello world"};
+	stream.synchronize();
+	auto stream_on_peer = peer.create_stream(cuda::stream::async);
+	bool got_expected_exception = false;
+	try {
+		stream_on_peer.enqueue.copy(pool_allocated_region, {str.data(), str.size()});
+		stream_on_peer.synchronize();
+	}
+	catch (cuda::runtime_error &ex) {
+		if (ex.code() != cuda::status::invalid_value) {
+			throw (ex);
+		}
+		got_expected_exception = true;
+	}
+	if (not got_expected_exception) {
+		die_("Did not get the expected exception");
+	}
+}
+
+// Should only run when peer and device can access each other
+void try_reading_from_pool_allocation_without_permission(
+	cuda::memory::pool_t&                pool,
+	cuda::memory::region_t&              pool_allocated_region,
+	cuda::memory::access_permissions_t&  permissions,
+	cuda::device_t&                      peer)
+{
+	permissions.read = false;
+	permissions.write = true;
+	pool.set_access_permissions(peer, permissions);
+	auto host_buffer_uptr = std::unique_ptr<char[]>(new char[region_size]); // replace this with make_unique in C++14
+	auto host_buffer = cuda::span<char>{host_buffer_uptr.get(), region_size};
+	std::fill_n(host_buffer.begin(), host_buffer.size()-1, 'a');
+	host_buffer[host_buffer.size()-1] = '\0';
+	auto stream_on_peer = peer.create_stream(cuda::stream::async);
+	bool got_expected_exception = false;
+	try {
+		stream_on_peer.enqueue.copy(host_buffer, pool_allocated_region);
+		stream_on_peer.synchronize();
+	}
+	catch (cuda::runtime_error &ex) {
+		if (ex.code() != cuda::status::invalid_value) {
+			throw (ex);
+		}
+		got_expected_exception = true;
+	}
+	if (not got_expected_exception) {
+		die_("Did not get the expected exception");
+	}
+}
+
+int main(int argc, char** argv)
+{
+	auto device_id = choose_device(argc, argv);
+	auto device = cuda::device::get(device_id);
+	auto stream = device.create_stream(cuda::stream::async);
+	auto pool = device.create_memory_pool();
+
+	check_getters_and_rewrapping(pool);
+	auto pool_allocated_region = pool.allocate(stream, region_size);
+	play_with_attributes(pool);
+	copy_through_pool_allocation(stream, pool_allocated_region);
+
+	cuda::memory::access_permissions_t permissions = try_forbidding_same_device_access(device_id, pool);
+
+	auto maybe_peer_id = maybe_get_p2p_peer_id(device_id);
+	if (maybe_peer_id) {
+		auto peer = cuda::device::get(maybe_peer_id.value());
+		try_writing_to_pool_allocation_without_permission(
+			stream, pool, pool_allocated_region, permissions, peer);
+		try_reading_from_pool_allocation_without_permission(
+			pool, pool_allocated_region, permissions, peer);
+	}
+	else {
+		std::cout << "No device can server as a peer of our chosen device, with P2P access to its memory;\n"
+					 "Skipping P2P-related tests.";
+	}
+
+	stream.enqueue.free(pool_allocated_region);
+
+	std::cout << "\nSUCCESS\n\n";
+}
+
+void check_getters_and_rewrapping(const cuda::memory::pool_t &pool)
+{
+	auto device_id = pool.device_id();
+	auto handle = pool.handle();
+	if (not pool.is_owning()) {
+		die_("Memory pool proxy should be owning, but isn't");
+	}
+
+	static const bool is_not_owning { false };
+	auto wrapped = cuda::memory::pool::wrap(device_id, handle, is_not_owning);
+	if (wrapped != pool) {
+		die_("Rewrapped pool proxy different than the pool proxy we queried");
+	}
+}