Skip to content

Commit

Permalink
Fix appendage of nullptrs to args of a CUDA kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
kchristin22 committed Sep 21, 2024
1 parent b9a390d commit 99efd46
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions include/clad/Differentiator/Differentiator.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,17 @@ CUDA_HOST_DEVICE T push(tape<T>& to, ArgsT... val) {
CUDA_ARGS CUDA_REST_ARGS Args&&... args) {
#if defined(__CUDACC__) && !defined(__CUDA_ARCH__)
if (CUDAkernel) {
void* argPtrs[] = {(void*)&args..., (void*)static_cast<Rest>(nullptr)...};
cudaLaunchKernel((void*)f, grid, block, argPtrs, shared_mem, stream);
constexpr size_t totalArgs = sizeof...(args) + sizeof...(Rest);
std::vector<void*> argPtrs;
argPtrs.reserve(totalArgs);
(argPtrs.push_back(static_cast<void*>(&args)), ...);

void* null_param = nullptr;
for (size_t i = sizeof...(args); i < totalArgs; ++i)
argPtrs[i] = &null_param;

cudaLaunchKernel((void*)f, grid, block, argPtrs.data(), shared_mem, stream);
return return_type_t<F>();
} else {
return f(static_cast<Args>(args)..., static_cast<Rest>(nullptr)...);
}
Expand Down

0 comments on commit 99efd46

Please sign in to comment.