Regards #573: Removed unnecessary inclusions of <cuda_runtime.h>

eyalroz · Mar 1, 2024 · bd6c0ba · bd6c0ba
1 parent db4d3e8
commit bd6c0ba
Show file tree

Hide file tree

Showing 8 changed files with 27 additions and 19 deletions.
diff --git a/src/cuda/api/apriori_compiled_kernel.hpp b/src/cuda/api/apriori_compiled_kernel.hpp
@@ -10,6 +10,11 @@
 
 #include "kernel.hpp"
 #include "current_context.hpp"
+
+// The following is needed for occupancy-related calculation convenience
+// and kernel-attribute-related API functions
+#include <cuda_runtime.h>
+
 #include <type_traits>
 
 namespace cuda {

diff --git a/src/cuda/api/array.hpp b/src/cuda/api/array.hpp
@@ -15,8 +15,6 @@
 #include "context.hpp"
 #include "error.hpp"
 
-#include <cuda_runtime.h>
-
 #ifndef CUDA_NO_HALF
 #include <cuda_fp16.h>
 #endif

diff --git a/src/cuda/api/kernel.hpp b/src/cuda/api/kernel.hpp
@@ -16,8 +16,6 @@
 #include "types.hpp"
 #include "current_context.hpp"
 
-#include <cuda_runtime.h>
-
 #if CUDA_VERSION < 11000
 #define CAN_GET_APRIORI_KERNEL_HANDLE 0
 #define VIRTUAL_UNLESS_CAN_GET_APRIORI_KERNEL_HANDLE virtual

diff --git a/src/cuda/api/kernel_launch.hpp b/src/cuda/api/kernel_launch.hpp
@@ -32,9 +32,7 @@
  * @note Even though when you use this wrapper, your code will not have the silly
  * chevron, you can't use it from regular `.cpp` files compiled with your host
  * compiler. Hence the `.cuh` extension. You _can_, however, safely include this
- * file from your `.cpp` for other definitions. Theoretically, we could have
- * used the `cudaLaunchKernel` API function, by creating an array on the stack
- * which points to all of the other arguments, but that's kind of redundant.
+ * file from your `.cpp` for other definitions.
  *
  */
 
@@ -46,6 +44,11 @@
 #include "kernel.hpp"
 #include "apriori_compiled_kernel.hpp"
 
+#if CUDA_VERSION >= 9000
+// The following is necessary for cudaLaunchCooperativeKernel
+#include <cuda_runtime.h>
+#endif // CUDA_VERSION >= 9000
+
 #include <type_traits>
 #include <utility>
 

diff --git a/src/cuda/api/memory.hpp b/src/cuda/api/memory.hpp
@@ -34,7 +34,8 @@
 #include "pointer.hpp"
 #include "current_context.hpp"
 
-#include <cuda_runtime.h> // needed, rather than cuda_runtime_api.h, e.g. for cudaMalloc
+// The following is needed for cudaGetSymbolAddress, cudaGetSymbolSize
+#include <cuda_runtime.h>
 
 #include <memory>
 #include <cstring> // for ::std::memset

diff --git a/src/cuda/api/miscellany.hpp b/src/cuda/api/miscellany.hpp
@@ -9,8 +9,6 @@
 #define CUDA_API_WRAPPERS_MISCELLANY_HPP_
 
 #include "types.hpp"
-
-#include <cuda_runtime_api.h>
 #include "error.hpp"
 
 #include <ostream>
@@ -27,14 +25,14 @@ namespace cuda {
  */
 inline void initialize_driver()
 {
-	static constexpr const unsigned dummy_flags { 0 }; // this is the only allowed value for flags
+	static constexpr const unsigned dummy_flags{0}; // this is the only allowed value for flags
 	auto status = cuInit(dummy_flags);
 	throw_if_error_lazy(status, "Failed initializing the CUDA driver");
 }
 
 inline void ensure_driver_is_initialized()
 {
-	thread_local bool driver_known_to_be_initialized { false };
+	thread_local bool driver_known_to_be_initialized{false};
 	if (not driver_known_to_be_initialized) {
 		initialize_driver();
 		driver_known_to_be_initialized = true;
@@ -58,14 +56,17 @@ namespace device {
 inline device::id_t count()
 {
 	initialize_driver();
-		// This function is often called before any device is obtained (which is where we
-		// expect the driver to be initialized)
+	// This function is often called before any device is obtained (which is where we
+	// expect the driver to be initialized)
 	int device_count = 0; // Initializing, just to be on the safe side
 	status_t result = cuDeviceGetCount(&device_count);
-	switch(result) {
-		case status::no_device: return 0;
-		case status::success: break;
-		default: throw runtime_error(result, "Failed obtaining the number of CUDA devices on the system");
+	switch (result) {
+	case status::no_device:
+		return 0;
+	case status::success:
+		break;
+	default:
+		throw runtime_error(result, "Failed obtaining the number of CUDA devices on the system");
 	}
 	if (device_count < 0) {
 		throw ::std::logic_error("cudaGetDeviceCount() reports an invalid number of CUDA devices");

diff --git a/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp b/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp
@@ -16,6 +16,9 @@
 #include "../pointer.hpp"
 #include "../device.hpp"
 
+// The following is needed for occupancy-related calculation convenience functions
+#include <cuda_runtime.h>
+
 namespace cuda {
 
 namespace detail_ {

diff --git a/src/cuda/api/texture_view.hpp b/src/cuda/api/texture_view.hpp
@@ -12,7 +12,6 @@
 #include "array.hpp"
 #include "error.hpp"
 #include "memory.hpp"
-#include <cuda_runtime.h>
 
 namespace cuda {