Skip to content

Commit

Permalink
New example program: memory_pools
Browse files Browse the repository at this point in the history
  • Loading branch information
eyalroz committed Apr 16, 2023
1 parent d9e6e4b commit 951e298
Show file tree
Hide file tree
Showing 2 changed files with 259 additions and 0 deletions.
3 changes: 3 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ add_executable(version_management by_api_module/version_management.cpp)
add_executable(error_handling by_api_module/error_handling.cu)
add_executable(device_management by_api_module/device_management.cpp)
add_executable(context_management by_api_module/context_management.cpp)
if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.2")
add_executable(memory_pools by_api_module/memory_pools.cpp)
endif()
add_executable(execution_control by_api_module/execution_control.cu)
set_property(TARGET execution_control PROPERTY CUDA_SEPARABLE_COMPILATION ON)

Expand Down
256 changes: 256 additions & 0 deletions examples/by_api_module/memory_pools.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
#include <cuda/api.hpp>
#include "../common.hpp"

//
//__global__ void print_message(const char* message, size_t length)
//{
// printf("%*s\n", length, message);
//}

static constexpr const size_t region_size { 1024UL }; // 64 MiB

void check_getters_and_rewrapping(const cuda::memory::pool_t &pool);

cuda::optional<cuda::device::id_t> maybe_get_p2p_peer_id(cuda::device::id_t device_id)
{
const auto& dev = cuda::device::get(device_id);
for(const auto& peer : cuda::devices()) {
if (peer.id() == device_id) { continue; }
if (cuda::device::peer_to_peer::can_access_each_other(dev, peer)) {
return peer.id();
}
}
return cuda::nullopt;
}

void play_with_attributes(const cuda::memory::pool_t &pool)
{
bool allow_internal_deps = pool.get_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES>();
bool allow_opportunistic = pool.get_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC>();
bool when_dependent_on_free = pool.get_attribute<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES>();
if (not (allow_internal_deps and allow_opportunistic and when_dependent_on_free)) {
die_("Pool reuse policy differs from expect default");
}
auto reuse_policy = pool.reuse_policy();
if ( reuse_policy.when_dependent_on_free != when_dependent_on_free
or reuse_policy.independent_but_actually_freed != allow_opportunistic
or reuse_policy.allow_waiting_for_frees != allow_internal_deps)
{
die_("Some aggregate reuse policy fields doesn't equal attributes obtained individually");
}
reuse_policy.when_dependent_on_free = false;

pool.set_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES>(false);
pool.set_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC>(false);
pool.set_attribute<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES>(false);

allow_internal_deps = pool.get_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES>();
allow_opportunistic = pool.get_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC>();
when_dependent_on_free = pool.get_attribute<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES>();

if (allow_internal_deps) {
die_("Pool reuse policy ALLOW_INTERNAL_DEPS is true despite having been set to false!");
}

if (allow_opportunistic) {
die_("Pool reuse policy ALLOW_OPPORTUNISTIC is true despite having been set to false!");
}

if (when_dependent_on_free) {
die_("Pool reuse policy FOLLOW_EVENT_DEPENDENCIES is true despite having been set to false!");
}

#if CUDA_VERSION >= 11300
auto max_use_since_reset = pool.get_attribute<CU_MEMPOOL_ATTR_USED_MEM_HIGH>();
if (max_use_since_reset != region_size) {
die_("Unexpected 'high-use watermark' property value for a memory pool");
}

auto temp_region = pool.allocate(stream, 2 * region_size);

auto currently_used_mem = pool.get_attribute<CU_MEMPOOL_ATTR_USED_MEM_CURRENT>();
if (currently_used_mem != 3 * region_size) {
die_("Unexpected amount of currently-used memory reported by a memory pool");
}

stream.enqueue.free(temp_region);

currently_used_mem = pool.get_attribute<CU_MEMPOOL_ATTR_USED_MEM_CURRENT>();
if (currently_used_mem != region_size) {
die_("Unexpected amount of currently-used memory reported by a memory pool");
}

max_use_since_reset = pool.get_attribute<CU_MEMPOOL_ATTR_USED_MEM_HIGH>();
if (max_use_since_reset != 3 * region_size) {
die_("Unexpected 'high-use watermark' property value for a memory pool");
}

pool.set_attribute<CU_MEMPOOL_ATTR_USED_MEM_HIGH>(0);

max_use_since_reset = pool.get_attribute<CU_MEMPOOL_ATTR_USED_MEM_HIGH>();
if (max_use_since_reset != region_size) {
die_("Unexpected 'high-use watermark' property value for a memory pool, after a reset: Expected "
+ std::to_string(region_size) + " but got " + std::to_string(max_use_since_reset));
}
#endif // CUDA_VERSION >= 11030

pool.set_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES>(true);
pool.set_attribute<CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC>(true);
pool.set_attribute<CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES>(true);
}

void copy_through_pool_allocation(
const cuda::stream_t& stream,
cuda::memory::region_t pool_allocated_region)
{
auto overwritten = "This will initialize the host buffer and should get overwritten.";
auto host_buffer_uptr = std::unique_ptr<char[]>(new char[region_size]); // replace this with make_unique in C++14
auto host_buffer = cuda::span<char>{host_buffer_uptr.get(), region_size};
strcpy(host_buffer.data(), overwritten);
(pool_allocated_region.size() == region_size) or die_("Unexpected allocation size");
auto message = [](const char* msg) {
return cuda::span<const char>{msg, strlen(msg) + 1};
}("I get copied around");
stream.enqueue.copy(pool_allocated_region, message);
auto async_device_region = stream.enqueue.allocate(region_size);
stream.enqueue.copy(async_device_region, pool_allocated_region);
stream.enqueue.copy(host_buffer, pool_allocated_region);
stream.synchronize();
if (strncmp(message.data(), host_buffer.data(), message.size()) != 0) {
auto len_of_overwritten = strlen(overwritten);
host_buffer[len_of_overwritten] = '\0';
std::ostringstream oss;
oss << "Unexpected resulting buffer on the host side.\n"
<< "Expected: \"" << message.data() << "\"\n"
<< "Actual (up to " << len_of_overwritten << " chars): \"" << host_buffer.data() << "\"\n";
die_(oss.str());
}
}

cuda::memory::access_permissions_t
try_forbidding_same_device_access(int device_id, cuda::memory::pool_t &pool)
{
cuda::memory::access_permissions_t permissions;
permissions.read = true;
permissions.write = false;
bool got_expected_exception = false;
try {
pool.set_access_permissions(cuda::device::get(device_id), permissions);
}
catch(std::invalid_argument& exception) {
got_expected_exception = true;
}
if (not got_expected_exception) {
die_("Unexpected success in setting a device's access permissions to a pool using"
"that device; it should have failed");
}
return permissions;
}

// Should only run when peer and device can access each other
void try_writing_to_pool_allocation_without_permission(
const cuda::stream_t& stream,
cuda::memory::pool_t& pool,
cuda::memory::region_t& pool_allocated_region,
cuda::memory::access_permissions_t& permissions,
cuda::device_t& peer)
{
permissions.read = true;
permissions.write = false;
pool.set_access_permissions(peer, permissions);
std::string str{"hello world"};
stream.synchronize();
auto stream_on_peer = peer.create_stream(cuda::stream::async);
bool got_expected_exception = false;
try {
stream_on_peer.enqueue.copy(pool_allocated_region, {str.data(), str.size()});
stream_on_peer.synchronize();
}
catch (cuda::runtime_error &ex) {
if (ex.code() != cuda::status::invalid_value) {
throw (ex);
}
got_expected_exception = true;
}
if (not got_expected_exception) {
die_("Did not get the expected exception");
}
}

// Should only run when peer and device can access each other
void try_reading_from_pool_allocation_without_permission(
cuda::memory::pool_t& pool,
cuda::memory::region_t& pool_allocated_region,
cuda::memory::access_permissions_t& permissions,
cuda::device_t& peer)
{
permissions.read = false;
permissions.write = true;
pool.set_access_permissions(peer, permissions);
auto host_buffer_uptr = std::unique_ptr<char[]>(new char[region_size]); // replace this with make_unique in C++14
auto host_buffer = cuda::span<char>{host_buffer_uptr.get(), region_size};
std::fill_n(host_buffer.begin(), host_buffer.size()-1, 'a');
host_buffer[host_buffer.size()-1] = '\0';
auto stream_on_peer = peer.create_stream(cuda::stream::async);
bool got_expected_exception = false;
try {
stream_on_peer.enqueue.copy(host_buffer, pool_allocated_region);
stream_on_peer.synchronize();
}
catch (cuda::runtime_error &ex) {
if (ex.code() != cuda::status::invalid_value) {
throw (ex);
}
got_expected_exception = true;
}
if (not got_expected_exception) {
die_("Did not get the expected exception");
}
}

int main(int argc, char** argv)
{
auto device_id = choose_device(argc, argv);
auto device = cuda::device::get(device_id);
auto stream = device.create_stream(cuda::stream::async);
auto pool = device.create_memory_pool();

check_getters_and_rewrapping(pool);
auto pool_allocated_region = pool.allocate(stream, region_size);
play_with_attributes(pool);
copy_through_pool_allocation(stream, pool_allocated_region);

cuda::memory::access_permissions_t permissions = try_forbidding_same_device_access(device_id, pool);

auto maybe_peer_id = maybe_get_p2p_peer_id(device_id);
if (maybe_peer_id) {
auto peer = cuda::device::get(maybe_peer_id.value());
try_writing_to_pool_allocation_without_permission(
stream, pool, pool_allocated_region, permissions, peer);
try_reading_from_pool_allocation_without_permission(
pool, pool_allocated_region, permissions, peer);
}
else {
std::cout << "No device can server as a peer of our chosen device, with P2P access to its memory;\n"
"Skipping P2P-related tests.";
}

stream.enqueue.free(pool_allocated_region);

std::cout << "\nSUCCESS\n\n";
}

void check_getters_and_rewrapping(const cuda::memory::pool_t &pool)
{
auto device_id = pool.device_id();
auto handle = pool.handle();
if (not pool.is_owning()) {
die_("Memory pool proxy should be owning, but isn't");
}

static const bool is_not_owning { false };
auto wrapped = cuda::memory::pool::wrap(device_id, handle, is_not_owning);
if (wrapped != pool) {
die_("Rewrapped pool proxy different than the pool proxy we queried");
}
}

0 comments on commit 951e298

Please sign in to comment.