Skip to content

Commit

Permalink
Fixes #9: A near-complete revamp of the APIs, now taking the driver A…
Browse files Browse the repository at this point in the history
…PI into account and exposing most of its functionality.
  • Loading branch information
Eyal Rozenberg committed Dec 3, 2020
1 parent 2b46850 commit 56eddcd
Show file tree
Hide file tree
Showing 59 changed files with 4,778 additions and 766 deletions.
8 changes: 4 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "lib/")
# Our library targets
# -------------------

add_library(runtime-api INTERFACE) # A header-only library!
add_library(runtime-and-driver INTERFACE) # A header-only library!
add_library(nvtx)
set(wrapper-libraries runtime-api nvtx)
set(wrapper-libraries runtime-and-driver nvtx)

foreach(WRAPPER_LIB ${wrapper-libraries})
target_compile_features(${WRAPPER_LIB} INTERFACE cxx_std_11) # This means _at least_ C++11
Expand All @@ -57,11 +57,11 @@ foreach(WRAPPER_LIB ${wrapper-libraries})
# target_link_libraries(${WRAPPER_LIB} PUBLIC CUDA::CUDALibs)
# ... but that's not supported.
target_include_directories(${WRAPPER_LIB} INTERFACE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
target_link_libraries(${WRAPPER_LIB} INTERFACE ${CUDA_LIBRARIES})
target_link_libraries(${WRAPPER_LIB} INTERFACE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})
endforeach()

set_target_properties(nvtx PROPERTIES OUTPUT_NAME "cuda-nvtx-wrappers")
target_link_libraries(nvtx PUBLIC runtime-api)
target_link_libraries(nvtx PUBLIC runtime-and-driver)
set_property(TARGET nvtx PROPERTY CXX_STANDARD 11)
set_property(TARGET nvtx PROPERTY CXX_STANDARD_REQUIRED ON)
set_property(TARGET nvtx PROPERTY CXX_EXTENSIONS OFF)
Expand Down
4 changes: 3 additions & 1 deletion examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,13 @@ set(CMAKE_CUDA_STANDARD 11)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_EXTENSIONS OFF)

link_libraries(runtime-api)
link_libraries(runtime-and-driver)

set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "bin")
add_executable(vectorAdd modified_cuda_samples/vectorAdd/vectorAdd.cu)
add_executable(vectorAddMapped modified_cuda_samples/vectorAddMapped/vectorAddMapped.cu)
add_executable(vectorAddManaged modified_cuda_samples/vectorAddManaged/vectorAddManaged.cu)
add_executable(simpleDrvRuntimePTX modified_cuda_samples/simpleDrvRuntimePTX/simpleDrvRuntimePTX.cpp)
add_executable(inlinePTX modified_cuda_samples/inlinePTX/inlinePTX.cu)
add_executable(simpleStreams modified_cuda_samples/simpleStreams/simpleStreams.cu)
add_executable(simpleIPC modified_cuda_samples/simpleIPC/simpleIPC.cu)
Expand All @@ -44,6 +45,7 @@ add_dependencies(modified_cuda_samples vectorAdd inlinePTX simpleStreams simpleI
add_executable(version_management by_runtime_api_module/version_management.cpp)
add_executable(error_handling by_runtime_api_module/error_handling.cu)
add_executable(device_management by_runtime_api_module/device_management.cpp)
add_executable(context_management by_driver_api_module/context_management.cpp)
add_executable(execution_control by_runtime_api_module/execution_control.cu)

add_executable(stream_management by_runtime_api_module/stream_management.cu)
Expand Down
176 changes: 176 additions & 0 deletions examples/by_driver_api_module/context_management.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
/**
* An example program utilizing most/all calls from the CUDA
* Driver API module:
*
* Device Management
*/
#include "../common.hpp"

void test_context(
const cuda::context_t& context,
bool is_primary,
cuda::device::id_t device_id)
{
std::cout << "Testing " << (is_primary ? "" : "non-") << "primary context " << context << '\n';
if (context.device_id() != device_id) {
die_("The device's primary context's reported ID and the device wrapper's ID differ: "
+ std::to_string(context.device_id()) + " !=" + std::to_string(device_id));
}

if (context.device().id() != device_id) {
die_("The context's associated device's ID is not the same as that of the device for which we obtained the context: "
+ std::to_string(context.device().id()) + " !=" + std::to_string(device_id) );
}

if (context.is_primary() != is_primary) {
die_(std::string("The ") + (is_primary ? "" : "non-") + "primary context " + std::to_string(context)
+ " \"believes\" it is " + (is_primary ? "not " : "") + "primary.");
}

// Specific attributes and properties with their own API calls:
// L1/shared mem (CacheConfig), shared memory bank size (SharedMemConfig)
// and stream priority range
// ----------------------------------------------------------------

auto cache_preference = context.cache_preference();
std::cout << "The cache preference for context " << context << " is: " << cache_preference << ".\n";

auto new_cache_preference =
cache_preference == cuda::multiprocessor_cache_preference_t::prefer_l1_over_shared_memory ?
cuda::multiprocessor_cache_preference_t::prefer_shared_memory_over_l1 :
cuda::multiprocessor_cache_preference_t::prefer_l1_over_shared_memory;
context.set_cache_preference(new_cache_preference);
cache_preference = context.cache_preference();
assert_(cache_preference == new_cache_preference);
std::cout << "The cache preference for context " << context << " has now been set to: " << new_cache_preference << ".\n";

auto shared_mem_bank_size = context.shared_memory_bank_size();
shared_mem_bank_size =
(shared_mem_bank_size == cudaSharedMemBankSizeFourByte) ?
cudaSharedMemBankSizeEightByte : cudaSharedMemBankSizeFourByte;
context.set_shared_memory_bank_size(shared_mem_bank_size);
auto stream_priority_range = context.stream_priority_range();
std::cout << "Streams on context " << context << " have priorities between "
<< stream_priority_range.first << " and " <<
(stream_priority_range.second == cuda::stream::unbounded_priority ? "(unbounded)" :
std::to_string(stream_priority_range.second)) << ".\n";
assert_(
stream_priority_range.second == cuda::stream::unbounded_priority ||
stream_priority_range.first <= stream_priority_range.second
);

// Resource limits
// --------------------

auto printf_fifo_size = context.get_limit(cudaLimitPrintfFifoSize);
std::cout << "The printf FIFO size for context " << context << " is " << printf_fifo_size << ".\n";
decltype(printf_fifo_size) new_printf_fifo_size =
(printf_fifo_size <= 1024) ? 2 * printf_fifo_size : printf_fifo_size - 512;
context.set_limit(cudaLimitPrintfFifoSize, new_printf_fifo_size);
printf_fifo_size = context.get_limit(cudaLimitPrintfFifoSize);
assert_(printf_fifo_size == new_printf_fifo_size);

// Flags - yes, yet another kind of attribute/property
// ----------------------------------------------------

std::cout << "Context " << context << " uses a"
<< (context.synch_scheduling_policy() ? " synchronous" : "n asynchronous")
<< " scheduling policy.\n";
std::cout << "Context " << context << " is set to "
<< (context.keeping_larger_local_mem_after_resize() ? "keep" : "discard")
<< " shared memory allocation after launch.\n";
std::cout << "Context " << context
<< " is set " << (context.can_map_host_memory() ? "to allow" : "not to allow")
<< " pinned mapped memory.\n";
// TODO: Change the settings as well obtaining them

}

int main(int argc, char **argv)
{
if (cuda::device::count() == 0) {
die_("No CUDA devices on this system");
}

// Being very cavalier about our command-line arguments here...
cuda::device::id_t device_id = (argc > 1) ?
std::stoi(argv[1]) : cuda::device::default_device_id;

if (cuda::device::count() <= device_id) {
die_("No CUDA device with ID " + std::to_string(device_id));
}

auto device = cuda::device::get(device_id);

std::cout << "Using CUDA device " << device.name() << " (having device ID " << device.id() << ")\n";

report_current_context("Before anything is done");
auto pc = device.primary_context();
report_current_context("After getting the primary context");

cuda::context::current::push(pc);
constexpr const bool is_primary = true;
constexpr const bool isnt_primary = false;
test_context(pc, is_primary, device_id);

{
auto popped = cuda::context::current::pop();
if (popped != pc) {
die_("After pushing context " + std::to_string(pc) + " and popping it - the pop result is a different context, " + std::to_string(popped));
}
}

auto created_context = cuda::context::create(device);
test_context(created_context, isnt_primary, device_id);

// Current context manipulation
// ----------------------------

cuda::context_t context_0 = pc;
cuda::context_t context_1 = created_context;
cuda::context::current::set(context_0);
assert_(cuda::context::current::get() == context_0);
assert_(cuda::context::current::detail::get_handle() == context_0.handle());
cuda::context::current::set(context_1);
assert_(cuda::context::current::get() == context_1);
assert_(cuda::context::current::detail::get_handle() == context_1.handle());


auto context_2 = cuda::context::create(device);
{
cuda::context::current::scoped_override_t context_for_this_block { context_2 };
assert_(context_2.handle() == cuda::context::current::get().handle());
assert_(context_2 == cuda::context::current::get());
}
auto gotten = cuda::context::current::get();
assert_(gotten == context_1);

auto context_3 = cuda::context::create_and_push(device);

// std::cout << "Contexts:\n";
// std::cout << "context_0: " << context_0 << '\n';
// std::cout << "context_1: " << context_1 << '\n';
// std::cout << "context_2: " << context_2 << '\n';
// std::cout << "context_3: " << context_3 << '\n';

{
cuda::context::current::scoped_override_t context_for_this_block { context_3 };
assert_(context_3.handle() == cuda::context::current::get().handle());
assert_(context_3 == cuda::context::current::get());
}

{
auto popped = cuda::context::current::pop();
assert_(popped == context_3);
}
gotten = cuda::context::current::get();
assert_(gotten == context_1);

device.synchronize();
device.reset();

device.synchronize();
device.reset();

std::cout << "\nSUCCESS\n";
}
Loading

0 comments on commit 56eddcd

Please sign in to comment.