From c1345619456c9eca42dad60368cf81bca1653cbd Mon Sep 17 00:00:00 2001 From: Jeremy Kubica <104161096+jeremykubica@users.noreply.github.com> Date: Mon, 11 Mar 2024 16:02:42 -0400 Subject: [PATCH] Reorganize CUDA dependencies --- CMakeLists.txt | 5 +- src/kbmod/search/{ => kernels}/cuda_errors.h | 0 .../search/{ => kernels}/image_kernels.cu | 2 +- src/kbmod/search/kernels/kernel_memory.cu | 109 ++++++++++++++++++ src/kbmod/search/kernels/kernel_memory.h | 44 +++++++ src/kbmod/search/{ => kernels}/kernels.cu | 63 +--------- src/kbmod/search/psi_phi_array.cpp | 9 +- src/kbmod/search/trajectory_list.cpp | 10 +- 8 files changed, 168 insertions(+), 74 deletions(-) rename src/kbmod/search/{ => kernels}/cuda_errors.h (100%) rename src/kbmod/search/{ => kernels}/image_kernels.cu (99%) create mode 100644 src/kbmod/search/kernels/kernel_memory.cu create mode 100644 src/kbmod/search/kernels/kernel_memory.h rename src/kbmod/search/{ => kernels}/kernels.cu (90%) diff --git a/CMakeLists.txt b/CMakeLists.txt index c3b78b726..ffcc34019 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,8 +71,9 @@ target_link_libraries(search PRIVATE if(HAVE_CUDA) message(STATUS "Building CUDA Libraries") add_library(searchcu STATIC - src/kbmod/search/image_kernels.cu - src/kbmod/search/kernels.cu + src/kbmod/search/kernels/image_kernels.cu + src/kbmod/search/kernels/kernel_memory.cu + src/kbmod/search/kernels/kernels.cu ) set_target_properties(searchcu PROPERTIES diff --git a/src/kbmod/search/cuda_errors.h b/src/kbmod/search/kernels/cuda_errors.h similarity index 100% rename from src/kbmod/search/cuda_errors.h rename to src/kbmod/search/kernels/cuda_errors.h diff --git a/src/kbmod/search/image_kernels.cu b/src/kbmod/search/kernels/image_kernels.cu similarity index 99% rename from src/kbmod/search/image_kernels.cu rename to src/kbmod/search/kernels/image_kernels.cu index 3a53a32fb..f417878fd 100644 --- a/src/kbmod/search/image_kernels.cu +++ b/src/kbmod/search/kernels/image_kernels.cu @@ -9,7 +9,7 @@ #define IMAGE_KERNELS_CU_ #include -#include "common.h" +#include "../common.h" #include "cuda_errors.h" #include #include diff --git a/src/kbmod/search/kernels/kernel_memory.cu b/src/kbmod/search/kernels/kernel_memory.cu new file mode 100644 index 000000000..3464f6613 --- /dev/null +++ b/src/kbmod/search/kernels/kernel_memory.cu @@ -0,0 +1,109 @@ +/* + * kernel_memory.cu + * + * Helper functions for transfering KBMOD data to/from GPU. + */ + +#ifndef KERNELS_MEMORY_CU_ +#define KERNELS_MEMORY_CU_ + +#include +#include +#include + +#include "cuda_errors.h" + +#include "../trajectory_list.h" + +namespace search { + + +// --------------------------------------- +// --- Basic Memory Functions ------------ +// --------------------------------------- + +extern "C" void* allocate_gpu_block(unsigned long memory_size) { + void *gpu_ptr; + checkCudaErrors(cudaMalloc((void **)&gpu_ptr, memory_size)); + if (gpu_ptr == nullptr) throw std::runtime_error("Unable to allocate GPU memory."); + return gpu_ptr; +} + +extern "C" void free_gpu_block(void* gpu_ptr) { + if (gpu_ptr == nullptr) throw std::runtime_error("Trying to free nullptr."); + checkCudaErrors(cudaFree(gpu_ptr)); +} + +extern "C" void copy_block_to_gpu(void* cpu_ptr, void* gpu_ptr, unsigned long memory_size) { + if (cpu_ptr == nullptr) throw std::runtime_error("Invalid CPU pointer"); + if (gpu_ptr == nullptr) throw std::runtime_error("Invalid GPU pointer"); + + checkCudaErrors(cudaMemcpy(gpu_ptr, cpu_ptr, memory_size, cudaMemcpyHostToDevice)); +} + +extern "C" void copy_block_to_cpu(void* cpu_ptr, void* gpu_ptr, unsigned long memory_size) { + if (cpu_ptr == nullptr) throw std::runtime_error("Invalid CPU pointer"); + if (gpu_ptr == nullptr) throw std::runtime_error("Invalid GPU pointer"); + + checkCudaErrors(cudaMemcpy(cpu_ptr, gpu_ptr, memory_size, cudaMemcpyDeviceToHost)); +} + +// --------------------------------------- +// --- Memory Functions ------------------ +// --------------------------------------- + + +extern "C" float *move_floats_to_gpu(std::vector &data) { + unsigned long memory_size = data.size() * sizeof(float); + + float *gpu_ptr; + checkCudaErrors(cudaMalloc((void **)&gpu_ptr, memory_size)); + checkCudaErrors(cudaMemcpy(gpu_ptr, data.data(), memory_size, cudaMemcpyHostToDevice)); + + return gpu_ptr; +} + +extern "C" void free_gpu_float_array(float *gpu_ptr) { + if (gpu_ptr == nullptr) throw std::runtime_error("Trying to free nullptr."); + checkCudaErrors(cudaFree(gpu_ptr)); +} + +extern "C" void *move_void_array_to_gpu(void *data_array, long unsigned memory_size) { + if (data_array == nullptr) throw std::runtime_error("No data given."); + if (memory_size == 0) throw std::runtime_error("Invalid size."); + + void *gpu_ptr; + checkCudaErrors(cudaMalloc((void **)&gpu_ptr, memory_size)); + checkCudaErrors(cudaMemcpy(gpu_ptr, data_array, memory_size, cudaMemcpyHostToDevice)); + + return gpu_ptr; +} + +extern "C" void free_gpu_void_array(void *gpu_ptr) { + if (gpu_ptr == nullptr) throw std::runtime_error("Trying to free nullptr."); + checkCudaErrors(cudaFree(gpu_ptr)); +} + +extern "C" Trajectory *allocate_gpu_trajectory_list(long unsigned num_trj) { + Trajectory *gpu_ptr; + checkCudaErrors(cudaMalloc((void **)&gpu_ptr, num_trj * sizeof(Trajectory))); + return gpu_ptr; +} + +extern "C" void free_gpu_trajectory_list(Trajectory *gpu_ptr) { checkCudaErrors(cudaFree(gpu_ptr)); } + +extern "C" void copy_trajectory_list(Trajectory *cpu_ptr, Trajectory *gpu_ptr, long unsigned num_trj, + bool to_gpu) { + if ((cpu_ptr == nullptr) || (gpu_ptr == nullptr)) throw std::runtime_error("Invalid pointer."); + long unsigned memory_size = num_trj * sizeof(Trajectory); + + if (to_gpu) { + checkCudaErrors(cudaMemcpy(gpu_ptr, cpu_ptr, memory_size, cudaMemcpyHostToDevice)); + } else { + checkCudaErrors(cudaMemcpy(cpu_ptr, gpu_ptr, memory_size, cudaMemcpyDeviceToHost)); + } +} + +} /* namespace search */ + +#endif /* KERNELS_MEMORY_CU_ */ diff --git a/src/kbmod/search/kernels/kernel_memory.h b/src/kbmod/search/kernels/kernel_memory.h new file mode 100644 index 000000000..12ed6bd0e --- /dev/null +++ b/src/kbmod/search/kernels/kernel_memory.h @@ -0,0 +1,44 @@ +/* + * kernel_memory.h + * + * Helper functions for transfering KBMOD data to/from GPU. + */ + +#ifndef KERNELS_MEMORY_H_ +#define KERNELS_MEMORY_H_ + +#include "../trajectory_list.h" + +namespace search { + +// --------------------------------------- +// --- Basic Memory Functions ------------ +// --------------------------------------- + +extern "C" void* allocate_gpu_block(unsigned long memory_size); + +extern "C" void free_gpu_block(void* gpu_ptr); + +extern "C" void copy_block_to_gpu(void* cpu_ptr, void* gpu_ptr, unsigned long memory_size); + +extern "C" void copy_block_to_cpu(void* cpu_ptr, void* gpu_ptr, unsigned long memory_size); + +// --------------------------------------- +// --- Memory Functions ------------------ +// --------------------------------------- + +extern "C" float *move_floats_to_gpu(std::vector &data); +extern "C" void free_gpu_float_array(float *gpu_ptr); + +extern "C" void *move_void_array_to_gpu(void *data_array, long unsigned memory_size); +extern "C" void free_gpu_void_array(void *gpu_ptr); + +extern "C" Trajectory *allocate_gpu_trajectory_list(long unsigned num_trj); +extern "C" void free_gpu_trajectory_list(Trajectory *gpu_ptr); +extern "C" void copy_trajectory_list(Trajectory *cpu_ptr, Trajectory *gpu_ptr, long unsigned num_trj, + bool to_gpu); + + +} /* namespace search */ + +#endif /* KERNELS_MEMORY_CU_ */ diff --git a/src/kbmod/search/kernels.cu b/src/kbmod/search/kernels/kernels.cu similarity index 90% rename from src/kbmod/search/kernels.cu rename to src/kbmod/search/kernels/kernels.cu index 1bcd14331..2473e68ac 100644 --- a/src/kbmod/search/kernels.cu +++ b/src/kbmod/search/kernels/kernels.cu @@ -16,67 +16,14 @@ #include #include -#include "common.h" +#include "../common.h" #include "cuda_errors.h" -#include "psi_phi_array_ds.h" -#include "trajectory_list.h" +#include "../psi_phi_array_ds.h" +#include "../trajectory_list.h" -namespace search { - -// --------------------------------------- -// --- Memory Functions ------------------ -// --------------------------------------- - -extern "C" float *move_floats_to_gpu(std::vector &data) { - unsigned long memory_size = data.size() * sizeof(float); - - float *gpu_ptr; - checkCudaErrors(cudaMalloc((void **)&gpu_ptr, memory_size)); - checkCudaErrors(cudaMemcpy(gpu_ptr, data.data(), memory_size, cudaMemcpyHostToDevice)); - - return gpu_ptr; -} - -extern "C" void free_gpu_float_array(float *gpu_ptr) { - if (gpu_ptr == nullptr) throw std::runtime_error("Trying to free nullptr."); - checkCudaErrors(cudaFree(gpu_ptr)); -} - -extern "C" void *move_void_array_to_gpu(void *data_array, long unsigned memory_size) { - if (data_array == nullptr) throw std::runtime_error("No data given."); - if (memory_size == 0) throw std::runtime_error("Invalid size."); - - void *gpu_ptr; - checkCudaErrors(cudaMalloc((void **)&gpu_ptr, memory_size)); - checkCudaErrors(cudaMemcpy(gpu_ptr, data_array, memory_size, cudaMemcpyHostToDevice)); +#include "kernel_memory.h" - return gpu_ptr; -} - -extern "C" void free_gpu_void_array(void *gpu_ptr) { - if (gpu_ptr == nullptr) throw std::runtime_error("Trying to free nullptr."); - checkCudaErrors(cudaFree(gpu_ptr)); -} - -extern "C" Trajectory *allocate_gpu_trajectory_list(long unsigned num_trj) { - Trajectory *gpu_ptr; - checkCudaErrors(cudaMalloc((void **)&gpu_ptr, num_trj * sizeof(Trajectory))); - return gpu_ptr; -} - -extern "C" void free_gpu_trajectory_list(Trajectory *gpu_ptr) { checkCudaErrors(cudaFree(gpu_ptr)); } - -extern "C" void copy_trajectory_list(Trajectory *cpu_ptr, Trajectory *gpu_ptr, long unsigned num_trj, - bool to_gpu) { - if ((cpu_ptr == nullptr) || (gpu_ptr == nullptr)) throw std::runtime_error("Invalid pointer."); - long unsigned memory_size = num_trj * sizeof(Trajectory); - - if (to_gpu) { - checkCudaErrors(cudaMemcpy(gpu_ptr, cpu_ptr, memory_size, cudaMemcpyHostToDevice)); - } else { - checkCudaErrors(cudaMemcpy(cpu_ptr, gpu_ptr, memory_size, cudaMemcpyDeviceToHost)); - } -} +namespace search { // --------------------------------------- // --- Data Access Functions ------------- diff --git a/src/kbmod/search/psi_phi_array.cpp b/src/kbmod/search/psi_phi_array.cpp index 038e71aa7..ac4e3d269 100644 --- a/src/kbmod/search/psi_phi_array.cpp +++ b/src/kbmod/search/psi_phi_array.cpp @@ -2,16 +2,13 @@ #include "psi_phi_array_utils.h" #include "pydocs/psi_phi_array_docs.h" -namespace search { - // Declaration of CUDA functions that will be linked in. #ifdef HAVE_CUDA -extern "C" float* move_floats_to_gpu(std::vector& data); -extern "C" void free_gpu_float_array(float* gpu_ptr); -extern "C" void* move_void_array_to_gpu(void* data_array, long unsigned memory_size); -extern "C" void free_gpu_void_array(void* gpu_ptr); +#include "kernels/kernel_memory.h" #endif +namespace search { + // ------------------------------------------------------- // --- Implementation of core data structure functions --- // ------------------------------------------------------- diff --git a/src/kbmod/search/trajectory_list.cpp b/src/kbmod/search/trajectory_list.cpp index 6db1734a9..d367b0c74 100644 --- a/src/kbmod/search/trajectory_list.cpp +++ b/src/kbmod/search/trajectory_list.cpp @@ -4,17 +4,13 @@ #include #include -namespace search { - // Declaration of CUDA functions that will be linked in. #ifdef HAVE_CUDA -extern "C" Trajectory *allocate_gpu_trajectory_list(long unsigned num_trj); +#include "kernels/kernel_memory.h" +#endif -extern "C" void free_gpu_trajectory_list(Trajectory *gpu_ptr); -extern "C" void copy_trajectory_list(Trajectory *cpu_ptr, Trajectory *gpu_ptr, long unsigned num_trj, - bool to_gpu); -#endif +namespace search { // ------------------------------------------------------- // --- Implementation of core data structure functions ---