Skip to content

Commit

Permalink
Fixes #388, regard #385: A compilation output class and builder-like …
Browse files Browse the repository at this point in the history
…interface for NVRTC programs.

Split the `program_t` class in two: A builder-ish `program_t` and a `compilation_output_t` type, produced by compilation of a program. The former lets you easily add or change settings, the latter holds all of the post-compilation methods for locating named globals and getting logs, PTX and cubin. This split should later also help us introduce the PTX compilation mechanism.
  • Loading branch information
eyalroz committed Jul 22, 2022
1 parent f56ed65 commit ef46e32
Show file tree
Hide file tree
Showing 7 changed files with 728 additions and 435 deletions.
11 changes: 6 additions & 5 deletions examples/modified_cuda_samples/clock_nvrtc/clock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,18 +123,19 @@ cuda::dynarray<char> compile_to_cubin(
const char* kernel_name,
cuda::device_t target_device)
{
auto program = cuda::rtc::program::create(kernel_name, kernel_source);
auto program = cuda::rtc::program::create(kernel_name)
.set_source(kernel_source).set_target(target_device);
// I wonder if using the same name for the program and the kernel is a good idea

program.compile_for(target_device);
auto log = program.compilation_log();
auto output = program.compile();
auto log = output.log();

if (log.size() >= 2) {
if (log.size() >= 1) {
std::cerr << "\n compilation log ---\n";
std::cerr << log.data();
std::cerr << "\n end log ---\n";
}
return program.cubin();
return output.cubin();
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,14 @@ int main(void)
::std::cout << "[Vector addition of " << numElements << " elements]\n";

auto device = cuda::device::current::get();
auto program = cuda::rtc::program::create(kernel_name, vectorAdd_source);
program.register_global(kernel_name);
program.compile_for(device);
auto mangled_kernel_name = program.get_mangling_of(kernel_name);
auto compilation_output = cuda::rtc::program_t(kernel_name)
.set_source(vectorAdd_source)
.add_registered_global(kernel_name)
.set_target(device).compile();
auto mangled_kernel_name = compilation_output.get_mangling_of(kernel_name);

auto context = cuda::device::current::get().primary_context();
auto module = cuda::module::create(context, program);
auto module = cuda::module::create(context, compilation_output);
auto vectorAdd = module.get_kernel(mangled_kernel_name);

// If we could rely on C++14, we would use ::std::make_unique
Expand Down
136 changes: 82 additions & 54 deletions examples/other/jitify/jitify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,29 +105,24 @@ std::string make_instantiation_name(string_view base_name, Ts&&... args)
sstr << '>';
return sstr.str();
}

bool try_compilation(
const cuda::rtc::program_t &program,
const cuda::device_t &device,
/**
*
* @param compilation_output
* @param fine_day hello world
*/
void handle_compilation_failure(
const cuda::rtc::compilation_output_t& compilation_output,
cuda::rtc::compilation_options_t compilation_options = {})
{
try {
compilation_options.set_language_dialect("c++11");
program.compile_for(device, compilation_options);
}
catch(std::exception& err) {
std::cerr << "Program compilation failed: " << err.what() << '\n';
auto compilation_log = program.compilation_log();
std::cerr << "Compilation options: " << compilation_options << '\n';
if (not compilation_log.empty()) {
std::cerr
<< "Compilation log:\n"
<< string_view(compilation_log.data(), compilation_log.size()) << '\n'
<< std::flush;
}
return false;
std::cerr << "Program compilation failed:\n";
auto compilation_log = compilation_output.log();
std::cerr << "Compilation options were: " << compilation_options << '\n';
if (not compilation_log.empty()) {
std::cerr
<< "Compilation log:\n"
<< string_view(compilation_log.data(), compilation_log.size()) << '\n'
<< std::flush;
}
return true;
}

template <typename T>
Expand All @@ -145,12 +140,19 @@ void my_kernel(T* data) {
)";
auto instantiation_name = make_instantiation_name(kernel_name, std::to_string(3), type_name<T>());
std::string source_with_instantiation = append_kernel_instantiation(program_source, instantiation_name);
auto program = cuda::rtc::program::create("my_program", source_with_instantiation.c_str());
auto device = cuda::device::current::get();
program.register_global(instantiation_name);
if (not try_compilation(program, device)) { return false; }
auto mangled_kernel_name = program.get_mangling_of(instantiation_name);
auto module = cuda::module::create(device, program);
auto program = cuda::rtc::program_t("my_program")
.set_source(source_with_instantiation)
.set_target(device)
.add_registered_global(instantiation_name);
program.options().set_language_dialect("c++11");
auto compilation_result = program.compile();

if (not compilation_result.succeeded()) {
handle_compilation_failure(compilation_result, program.options());
}
auto mangled_kernel_name = compilation_result.get_mangling_of(instantiation_name);
auto module = cuda::module::create(device, compilation_result);
// TODO: A kernel::get(const module_t& module, const char* mangled_name function)
auto kernel = module.get_kernel(mangled_kernel_name);

Expand Down Expand Up @@ -209,23 +211,34 @@ void my_kernel2(float const* indata, float* outdata) {
{"example_headers/my_header4.cuh", my_header4_cuh_contents}
};

auto program = cuda::rtc::program::create("my_program1", source_with_instantiation.c_str(), headers);
auto device = cuda::device::current::get();
program.register_globals(kernel_names[0], my_kernel2_instantiation_name);
cuda::rtc::compilation_options_t options;
auto program = cuda::rtc::program_t("my_program1")
.set_source(source_with_instantiation)
.set_headers(headers)
.set_target(device)
.add_registered_global(kernel_names[0])
.add_registered_global(my_kernel2_instantiation_name);
auto& options = program.options();
options.set_language_dialect("c++11");
options.use_fast_math = true;
options.default_execution_space_is_device = true;
// This is necessary because the headers included by this program have functions without a
// device/host qualification - and those default to being host functions, which NVRTC doesn't
// compile.
if (not try_compilation(program, device, options)) { return false; }
// Note: Headers whose sources were not provided to the program on creation will be sought after
// in the specified include directories (in our case, none), and the program's working directory.

auto compilation_result = program.compile();
// Note: Headers whose sources were not provided to the program on creation will be sought after
// in the specified include directories (in our case, none), and the program's working directory.

if (not compilation_result.succeeded()) {
handle_compilation_failure(compilation_result, program.options());
}

const char* mangled_kernel_names[2] = {
program.get_mangling_of(kernel_names[0]),
program.get_mangling_of(my_kernel2_instantiation_name)
compilation_result.get_mangling_of(kernel_names[0]),
compilation_result.get_mangling_of(my_kernel2_instantiation_name)
};
auto module = cuda::module::create(device, program);
auto module = cuda::module::create(device, compilation_result);
auto my_kernel1 = module.get_kernel(mangled_kernel_names[0]);
auto my_kernel2 = module.get_kernel(mangled_kernel_names[1]);

Expand Down Expand Up @@ -268,21 +281,30 @@ __global__ void constant_test(int *x) {
const char *c_b_a = "&c::b::a";
} names;

auto program = cuda::rtc::program::create("const_program", const_program_source);

cuda::rtc::compilation_options_t options;
options.use_fast_math = true;
program.register_globals(names.kernel, names.a, names.b_a, names.c_b_a);
auto device = cuda::device::current::get();
if (not try_compilation(program, device, options)) { return false; }
auto module = cuda::module::create(device, program);
auto program = cuda::rtc::program::create("const_program")
.set_source(const_program_source)
.add_registered_global(names.kernel)
.add_registered_global(names.a)
.add_registered_global(names.b_a)
.add_registered_global(names.c_b_a)
.set_target(device);

auto& options = program.options();
options.set_language_dialect("c++11");
options.use_fast_math = true;
auto compilation_result = program.compile();
if (not compilation_result.succeeded()) {
handle_compilation_failure(compilation_result, program.options());
}
auto module = cuda::module::create(device, compilation_result);

auto mangled_kernel_name = program.get_mangling_of(names.kernel);
auto mangled_kernel_name = compilation_result.get_mangling_of(names.kernel);
auto kernel = module.get_kernel(mangled_kernel_name);
int inval[] = {2, 4, 8};
auto a = module.get_global_region(program.get_mangling_of(names.a));
auto b_a = module.get_global_region(program.get_mangling_of(names.b_a));
auto c_b_a = module.get_global_region(program.get_mangling_of(names.c_b_a));
auto a = module.get_global_region(compilation_result.get_mangling_of(names.a));
auto b_a = module.get_global_region(compilation_result.get_mangling_of(names.b_a));
auto c_b_a = module.get_global_region(compilation_result.get_mangling_of(names.c_b_a));
cuda::memory::copy(a, &inval[0]);
cuda::memory::copy(b_a, &inval[1]);
cuda::memory::copy(c_b_a, &inval[2]);
Expand All @@ -295,22 +317,28 @@ __global__ void constant_test(int *x) {
return std::equal(inval, inval + n_const, outval);
}


bool test_constant_2()
{
// test __constant__ array look up in header nested in both anonymous and explicit namespace
constexpr int n_const = 3;
const char* second_kernel_name = "constant_test2";
auto program = cuda::rtc::program::create_empty("const_program_2");
cuda::rtc::compilation_options_t options;
auto device = cuda::device::current::get();
const char* name_of_anon_b_a = "&b::a";
auto program = cuda::rtc::program::create("const_program_2")
.add_registered_global(second_kernel_name)
.add_registered_global(name_of_anon_b_a)
.set_target(device);
auto& options = program.options();
options.preinclude_files.emplace_back("example_headers/constant_header.cuh");
options.use_fast_math = true;
const char* name_of_anon_b_a = "&b::a";
program.register_globals(second_kernel_name, name_of_anon_b_a);
auto device = cuda::device::current::get();
if (not try_compilation(program, device, options)) { return false; }
auto module = cuda::module::create(device, program);
auto anon_b_a = module.get_global_region(program.get_mangling_of(name_of_anon_b_a));
auto kernel = module.get_kernel(program.get_mangling_of(second_kernel_name));
auto compilation_result = program.compile();
if (not compilation_result.succeeded()) {
handle_compilation_failure(compilation_result, program.options());
}
auto module = cuda::module::create(device, compilation_result);
auto anon_b_a = module.get_global_region(compilation_result.get_mangling_of(name_of_anon_b_a));
auto kernel = module.get_kernel(compilation_result.get_mangling_of(second_kernel_name));
int inval[] = {3, 5, 9};
cuda::memory::copy(anon_b_a, inval);
auto launch_config = cuda::make_launch_config(cuda::grid::composite_dimensions_t::point());
Expand Down
1 change: 1 addition & 0 deletions src/cuda/nvrtc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <cuda/nvrtc/error.hpp>
#include <cuda/nvrtc/compilation_options.hpp>
#include <cuda/nvrtc/versions.hpp>
#include <cuda/nvrtc/compilation_output.hpp>
#include <cuda/nvrtc/program.hpp>

#endif // CUDA_NVRTC_WRAPPERS_HPP_
Loading

0 comments on commit ef46e32

Please sign in to comment.