From bd6c0ba6596fa49c20c3a1a49c0bf663e3a5d5fb Mon Sep 17 00:00:00 2001 From: Eyal Rozenberg Date: Mon, 22 Jan 2024 15:08:49 +0200 Subject: [PATCH] Regards #573: Removed unnecessary inclusions of `` --- src/cuda/api/apriori_compiled_kernel.hpp | 5 +++++ src/cuda/api/array.hpp | 2 -- src/cuda/api/kernel.hpp | 2 -- src/cuda/api/kernel_launch.hpp | 9 +++++--- src/cuda/api/memory.hpp | 3 ++- src/cuda/api/miscellany.hpp | 21 ++++++++++--------- .../api/multi_wrapper_impls/kernel_launch.hpp | 3 +++ src/cuda/api/texture_view.hpp | 1 - 8 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/cuda/api/apriori_compiled_kernel.hpp b/src/cuda/api/apriori_compiled_kernel.hpp index af8aaada..b98c70d8 100644 --- a/src/cuda/api/apriori_compiled_kernel.hpp +++ b/src/cuda/api/apriori_compiled_kernel.hpp @@ -10,6 +10,11 @@ #include "kernel.hpp" #include "current_context.hpp" + +// The following is needed for occupancy-related calculation convenience +// and kernel-attribute-related API functions +#include + #include namespace cuda { diff --git a/src/cuda/api/array.hpp b/src/cuda/api/array.hpp index 6fc78de7..03c9fa9b 100644 --- a/src/cuda/api/array.hpp +++ b/src/cuda/api/array.hpp @@ -15,8 +15,6 @@ #include "context.hpp" #include "error.hpp" -#include - #ifndef CUDA_NO_HALF #include #endif diff --git a/src/cuda/api/kernel.hpp b/src/cuda/api/kernel.hpp index 03101d6b..44a29f7d 100644 --- a/src/cuda/api/kernel.hpp +++ b/src/cuda/api/kernel.hpp @@ -16,8 +16,6 @@ #include "types.hpp" #include "current_context.hpp" -#include - #if CUDA_VERSION < 11000 #define CAN_GET_APRIORI_KERNEL_HANDLE 0 #define VIRTUAL_UNLESS_CAN_GET_APRIORI_KERNEL_HANDLE virtual diff --git a/src/cuda/api/kernel_launch.hpp b/src/cuda/api/kernel_launch.hpp index ca21de13..3992b991 100644 --- a/src/cuda/api/kernel_launch.hpp +++ b/src/cuda/api/kernel_launch.hpp @@ -32,9 +32,7 @@ * @note Even though when you use this wrapper, your code will not have the silly * chevron, you can't use it from regular `.cpp` files compiled with your host * compiler. Hence the `.cuh` extension. You _can_, however, safely include this - * file from your `.cpp` for other definitions. Theoretically, we could have - * used the `cudaLaunchKernel` API function, by creating an array on the stack - * which points to all of the other arguments, but that's kind of redundant. + * file from your `.cpp` for other definitions. * */ @@ -46,6 +44,11 @@ #include "kernel.hpp" #include "apriori_compiled_kernel.hpp" +#if CUDA_VERSION >= 9000 +// The following is necessary for cudaLaunchCooperativeKernel +#include +#endif // CUDA_VERSION >= 9000 + #include #include diff --git a/src/cuda/api/memory.hpp b/src/cuda/api/memory.hpp index a641df64..16436c8f 100644 --- a/src/cuda/api/memory.hpp +++ b/src/cuda/api/memory.hpp @@ -34,7 +34,8 @@ #include "pointer.hpp" #include "current_context.hpp" -#include // needed, rather than cuda_runtime_api.h, e.g. for cudaMalloc +// The following is needed for cudaGetSymbolAddress, cudaGetSymbolSize +#include #include #include // for ::std::memset diff --git a/src/cuda/api/miscellany.hpp b/src/cuda/api/miscellany.hpp index 08c4f4d1..6d04f547 100644 --- a/src/cuda/api/miscellany.hpp +++ b/src/cuda/api/miscellany.hpp @@ -9,8 +9,6 @@ #define CUDA_API_WRAPPERS_MISCELLANY_HPP_ #include "types.hpp" - -#include #include "error.hpp" #include @@ -27,14 +25,14 @@ namespace cuda { */ inline void initialize_driver() { - static constexpr const unsigned dummy_flags { 0 }; // this is the only allowed value for flags + static constexpr const unsigned dummy_flags{0}; // this is the only allowed value for flags auto status = cuInit(dummy_flags); throw_if_error_lazy(status, "Failed initializing the CUDA driver"); } inline void ensure_driver_is_initialized() { - thread_local bool driver_known_to_be_initialized { false }; + thread_local bool driver_known_to_be_initialized{false}; if (not driver_known_to_be_initialized) { initialize_driver(); driver_known_to_be_initialized = true; @@ -58,14 +56,17 @@ namespace device { inline device::id_t count() { initialize_driver(); - // This function is often called before any device is obtained (which is where we - // expect the driver to be initialized) + // This function is often called before any device is obtained (which is where we + // expect the driver to be initialized) int device_count = 0; // Initializing, just to be on the safe side status_t result = cuDeviceGetCount(&device_count); - switch(result) { - case status::no_device: return 0; - case status::success: break; - default: throw runtime_error(result, "Failed obtaining the number of CUDA devices on the system"); + switch (result) { + case status::no_device: + return 0; + case status::success: + break; + default: + throw runtime_error(result, "Failed obtaining the number of CUDA devices on the system"); } if (device_count < 0) { throw ::std::logic_error("cudaGetDeviceCount() reports an invalid number of CUDA devices"); diff --git a/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp b/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp index c12c5f30..523fc90f 100644 --- a/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp +++ b/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp @@ -16,6 +16,9 @@ #include "../pointer.hpp" #include "../device.hpp" +// The following is needed for occupancy-related calculation convenience functions +#include + namespace cuda { namespace detail_ { diff --git a/src/cuda/api/texture_view.hpp b/src/cuda/api/texture_view.hpp index 29601697..d28a32e8 100644 --- a/src/cuda/api/texture_view.hpp +++ b/src/cuda/api/texture_view.hpp @@ -12,7 +12,6 @@ #include "array.hpp" #include "error.hpp" #include "memory.hpp" -#include namespace cuda {