diff --git a/src/cuda/api/apriori_compiled_kernel.hpp b/src/cuda/api/apriori_compiled_kernel.hpp index a3adaae9..9d66f813 100644 --- a/src/cuda/api/apriori_compiled_kernel.hpp +++ b/src/cuda/api/apriori_compiled_kernel.hpp @@ -36,7 +36,7 @@ inline handle_t get_handle(const void *kernel_function_ptr, const char* name = n handle_t handle; auto status = cudaGetFuncBySymbol(&handle, kernel_function_ptr); - throw_if_error(status, "Failed obtaining a CUDA function handle for " + throw_if_error_lazy(status, "Failed obtaining a CUDA function handle for " + ((name == nullptr) ? ::std::string("a kernel function") : ::std::string("kernel function ") + name) + " at " + cuda::detail_::ptr_as_hex(kernel_function_ptr)); return handle; diff --git a/src/cuda/api/array.hpp b/src/cuda/api/array.hpp index 860e7494..95833029 100644 --- a/src/cuda/api/array.hpp +++ b/src/cuda/api/array.hpp @@ -70,7 +70,7 @@ handle_t create_in_current_context(dimensions_t<3> dimensions) descriptor.Flags = 0; auto status = cuArray3DCreate(&handle, &descriptor); - throw_if_error(status, "failed allocating 3D CUDA array"); + throw_if_error_lazy(status, "failed allocating 3D CUDA array"); return handle; } @@ -84,7 +84,7 @@ handle_t create_in_current_context(dimensions_t<2> dimensions) descriptor.NumChannels = 1; handle_t handle; auto status = cuArrayCreate(&handle, &descriptor); - throw_if_error(status, "failed allocating 2D CUDA array"); + throw_if_error_lazy(status, "failed allocating 2D CUDA array"); return handle; } @@ -106,7 +106,7 @@ handle_t get_descriptor(context::handle_t context_handle, handle_t handle) auto status = (NumDimensions == 2) ? cuArrayGetDescriptor(&result, handle) : cuArray3DGetDescriptor(&result, handle); - throw_if_error(status, + throw_if_error_lazy(status, ::std::string("Failed obtaining the descriptor of the CUDA ") + (NumDimensions == 2 ? "2":"3") + "D array at " + cuda::detail_::ptr_as_hex(handle)); return result; @@ -170,7 +170,7 @@ class array_t { auto status = cuArrayDestroy(handle_); // Note: Throwing in a noexcept destructor; if the free'ing fails, the program // will likely terminate - throw_if_error(status, "Failed destroying CUDA array " + cuda::detail_::ptr_as_hex(handle_)); + throw_if_error_lazy(status, "Failed destroying CUDA array " + cuda::detail_::ptr_as_hex(handle_)); } } diff --git a/src/cuda/api/context.hpp b/src/cuda/api/context.hpp index 7ed00701..78849e05 100644 --- a/src/cuda/api/context.hpp +++ b/src/cuda/api/context.hpp @@ -83,7 +83,7 @@ inline limit_value_t get_limit(limit_t limit_id) { limit_value_t limit_value; auto status = cuCtxGetLimit(&limit_value, limit_id); - throw_if_error(status, + throw_if_error_lazy(status, "Failed obtaining CUDA context limit value"); return limit_value; } @@ -91,7 +91,7 @@ inline limit_value_t get_limit(limit_t limit_id) inline void set_limit(limit_t limit_id, limit_value_t new_value) { auto status = cuCtxSetLimit(limit_id, new_value); - throw_if_error(status, "Failed obtaining CUDA context limit value"); + throw_if_error_lazy(status, "Failed obtaining CUDA context limit value"); } constexpr flags_t inline make_flags( @@ -122,7 +122,7 @@ inline size_t total_memory(handle_t handle) { size_t total_mem_in_bytes; auto status = cuMemGetInfo(nullptr, &total_mem_in_bytes); - throw_if_error(status, "Failed determining amount of total memory for " + identify(handle)); + throw_if_error_lazy(status, "Failed determining amount of total memory for " + identify(handle)); return total_mem_in_bytes; } @@ -131,14 +131,14 @@ inline size_t free_memory(handle_t handle) { size_t free_mem_in_bytes; auto status = cuMemGetInfo(&free_mem_in_bytes, nullptr); - throw_if_error(status, "Failed determining amount of free memory for " + identify(handle)); + throw_if_error_lazy(status, "Failed determining amount of free memory for " + identify(handle)); return free_mem_in_bytes; } inline void set_cache_preference(handle_t handle, multiprocessor_cache_preference_t preference) { auto status = cuCtxSetCacheConfig(static_cast(preference)); - throw_if_error(status, + throw_if_error_lazy(status, "Setting the multiprocessor L1/Shared Memory cache distribution preference to " + ::std::to_string((unsigned) preference) + " for " + identify(handle)); } @@ -147,7 +147,7 @@ inline multiprocessor_cache_preference_t cache_preference(handle_t handle) { CUfunc_cache preference; auto status = cuCtxGetCacheConfig(&preference); - throw_if_error(status, + throw_if_error_lazy(status, "Obtaining the multiprocessor L1/Shared Memory cache distribution preference for " + identify(handle)); return (multiprocessor_cache_preference_t) preference; } @@ -156,14 +156,14 @@ inline shared_memory_bank_size_t shared_memory_bank_size(handle_t handle) { CUsharedconfig bank_size; auto status = cuCtxGetSharedMemConfig(&bank_size); - throw_if_error(status, "Obtaining the multiprocessor shared memory bank size for " + identify(handle)); + throw_if_error_lazy(status, "Obtaining the multiprocessor shared memory bank size for " + identify(handle)); return static_cast(bank_size); } inline void set_shared_memory_bank_size(handle_t handle, shared_memory_bank_size_t bank_size) { auto status = cuCtxSetSharedMemConfig(static_cast(bank_size)); - throw_if_error(status, "Setting the multiprocessor shared memory bank size for " + identify(handle)); + throw_if_error_lazy(status, "Setting the multiprocessor shared memory bank size for " + identify(handle)); } inline void synchronize(context::handle_t handle) @@ -181,13 +181,13 @@ inline void synchronize(device::id_t device_id, context::handle_t handle) inline void destroy(handle_t handle) { auto status = cuCtxDestroy(handle); - throw_if_error(status, "Failed destroying " + identify(handle)); + throw_if_error_lazy(status, "Failed destroying " + identify(handle)); } inline void destroy(handle_t handle, device::id_t device_index) { auto status = cuCtxDestroy(handle); - throw_if_error(status, "Failed destroying " + identify(handle, device_index)); + throw_if_error_lazy(status, "Failed destroying " + identify(handle, device_index)); } inline context::flags_t get_flags(handle_t handle) @@ -461,7 +461,7 @@ class context_t { scoped_setter_type set_context_for_this_scope(handle_); context::stream_priority_range_t result; auto status = cuCtxGetStreamPriorityRange(&result.least, &result.greatest); - throw_if_error(status, "Obtaining the priority range for streams within " + + throw_if_error_lazy(status, "Obtaining the priority range for streams within " + context::detail_::identify(*this)); return result; } @@ -476,7 +476,7 @@ class context_t { { unsigned int raw_version; auto status = cuCtxGetApiVersion(handle_, &raw_version); - throw_if_error(status, "Failed obtaining the API version for " + context::detail_::identify(*this)); + throw_if_error_lazy(status, "Failed obtaining the API version for " + context::detail_::identify(*this)); return version_t::from_single_number((int) raw_version); } @@ -540,7 +540,7 @@ class context_t { scoped_setter_type set_context_for_this_scope(handle_); #if (CUDA_VERSION >= 11000) auto status = cuCtxResetPersistingL2Cache(); - throw_if_error(status, "Failed resetting/clearing the persisting L2 cache memory"); + throw_if_error_lazy(status, "Failed resetting/clearing the persisting L2 cache memory"); #endif throw cuda::runtime_error( cuda::status::insufficient_driver, @@ -709,7 +709,7 @@ inline handle_t create_and_push( keep_larger_local_mem_after_resize); handle_t handle; auto status = cuCtxCreate(&handle, flags, device_id); - throw_if_error(status, "failed creating a CUDA context associated with " + throw_if_error_lazy(status, "failed creating a CUDA context associated with " + device::detail_::identify(device_id)); return handle; } diff --git a/src/cuda/api/current_context.hpp b/src/cuda/api/current_context.hpp index 6f074812..79c8337f 100644 --- a/src/cuda/api/current_context.hpp +++ b/src/cuda/api/current_context.hpp @@ -33,7 +33,7 @@ inline bool exists() if (status == cuda::status::not_yet_initialized) { return false; } - throw_if_error(status, "Failed obtaining the current context's handle"); + throw_if_error_lazy(status, "Failed obtaining the current context's handle"); return (handle != context::detail_::none); } @@ -93,7 +93,7 @@ inline status_and_handle_pair get_with_status() inline handle_t get_handle() { auto p = get_with_status(); - throw_if_error(p.status, "Failed obtaining the current context's handle"); + throw_if_error_lazy(p.status, "Failed obtaining the current context's handle"); return p.handle; } @@ -102,7 +102,7 @@ inline context::flags_t get_flags() { context::flags_t result; auto status = cuCtxGetFlags(&result); - throw_if_error(status, "Failed obtaining the current context's flags"); + throw_if_error_lazy(status, "Failed obtaining the current context's flags"); return result; } @@ -110,7 +110,7 @@ inline device::id_t get_device_id() { device::id_t device_id; auto result = cuCtxGetDevice(&device_id); - throw_if_error(result, "Failed obtaining the current context's device"); + throw_if_error_lazy(result, "Failed obtaining the current context's device"); return device_id; } @@ -125,7 +125,7 @@ inline device::id_t get_device_id() inline void push(handle_t context_handle) { auto status = cuCtxPushCurrent(context_handle); - throw_if_error(status, "Failed pushing to the top of the context stack: " + throw_if_error_lazy(status, "Failed pushing to the top of the context stack: " + context::detail_::identify(context_handle)); } @@ -152,7 +152,7 @@ inline context::handle_t pop() { handle_t popped_context_handle; auto status = cuCtxPopCurrent(&popped_context_handle); - throw_if_error(status, "Failed popping the current CUDA context"); + throw_if_error_lazy(status, "Failed popping the current CUDA context"); return popped_context_handle; } @@ -162,7 +162,7 @@ inline void set(handle_t context_handle) // if (detail_::get_handle() == context_handle_) { return; } // ... but decided against it. auto status = cuCtxSetCurrent(context_handle); - throw_if_error(status, + throw_if_error_lazy(status, "Failed setting the current context to " + context::detail_::identify(context_handle)); } diff --git a/src/cuda/api/current_device.hpp b/src/cuda/api/current_device.hpp index a96efcb4..dbb4e6e4 100644 --- a/src/cuda/api/current_device.hpp +++ b/src/cuda/api/current_device.hpp @@ -52,7 +52,7 @@ inline id_t get_id() // Should we activate and push the default device's context? probably not. return default_device_id; } - throw_if_error(status, + throw_if_error_lazy(status, "Failed obtaining the current context for determining which device is active"); if (current_context_handle == context::detail_::none) { @@ -64,7 +64,7 @@ inline id_t get_id() // // handle_t device_id; // auto status = cudaGetDevice(&device_id); -// throw_if_error(status, "Failure obtaining current device id"); +// throw_if_error_lazy(status, "Failure obtaining current device id"); // return device_id; } @@ -154,7 +154,7 @@ inline void set(const id_t *device_ids, size_t num_devices) throw cuda::runtime_error(status::invalid_device, "More devices listed than exist on the system"); } auto result = cudaSetValidDevices(const_cast(device_ids), (int) num_devices); - throw_if_error(result, + throw_if_error_lazy(result, "Failure setting the current device to any of the list of " + ::std::to_string(num_devices) + " devices specified"); } diff --git a/src/cuda/api/device.hpp b/src/cuda/api/device.hpp index d6454e98..a81f94b4 100644 --- a/src/cuda/api/device.hpp +++ b/src/cuda/api/device.hpp @@ -93,7 +93,7 @@ inline ::std::string get_name(id_t id) auto buffer_size = (size_type) (sizeof(stack_buffer) / sizeof(char)); auto try_getting_name = [&](char* buffer, size_type buffer_size) -> size_type { auto status = cuDeviceGetName(buffer, buffer_size-1, id); - throw_if_error(status, "Failed obtaining the CUDA device name"); + throw_if_error_lazy(status, "Failed obtaining the CUDA device name"); buffer[buffer_size-1] = '\0'; return (size_type) ::std::strlen(buffer); }; @@ -163,7 +163,7 @@ class device_t { context_setter_type set_for_this_scope(primary_context_handle()); int result; auto status = cuDeviceCanAccessPeer(&result, id(), peer.id()); - throw_if_error(status, "Failed determining whether " + throw_if_error_lazy(status, "Failed determining whether " + device::detail_::identify(id_) + " can access " + device::detail_::identify(peer.id_)); return (result == 1); @@ -194,7 +194,7 @@ class device_t { uuid_t uuid () const { uuid_t result; auto status = cuDeviceGetUuid(&result, id_); - throw_if_error(status, "Failed obtaining UUID for " + device::detail_::identify(id_)); + throw_if_error_lazy(status, "Failed obtaining UUID for " + device::detail_::identify(id_)); return result; } #endif // CUDA_VERSION >= 9020 @@ -228,7 +228,7 @@ class device_t { void set_flags(flags_type new_flags) const { auto status = cuDevicePrimaryCtxSetFlags(id(), new_flags); - throw_if_error(status, "Failed setting (primary context) flags for device " + device::detail_::identify(id_)); + throw_if_error_lazy(status, "Failed setting (primary context) flags for device " + device::detail_::identify(id_)); } public: @@ -242,14 +242,14 @@ class device_t { { properties_t properties; auto status = cudaGetDeviceProperties(&properties, id()); - throw_if_error(status, "Failed obtaining device properties for " + device::detail_::identify(id_)); + throw_if_error_lazy(status, "Failed obtaining device properties for " + device::detail_::identify(id_)); return properties; } static device_t choose_best_match(const properties_t& properties) { device::id_t id; auto status = cudaChooseDevice(&id, &properties); - throw_if_error(status, "Failed choosing a best matching device by a a property set."); + throw_if_error_lazy(status, "Failed choosing a best matching device by a a property set."); return device::wrap(id); } @@ -274,7 +274,7 @@ class device_t { { attribute_value_t attribute_value; auto status = cuDeviceGetAttribute(&attribute_value, attribute, id_); - throw_if_error(status, "Failed obtaining device properties for " + device::detail_::identify(id_)); + throw_if_error_lazy(status, "Failed obtaining device properties for " + device::detail_::identify(id_)); return attribute_value; } @@ -428,7 +428,7 @@ class device_t { primary_context_handle_; context_setter_type set_context_for_this_scope{pc_handle}; auto status = cudaDeviceReset(); - throw_if_error(status, "Resetting " + device::detail_::identify(id_)); + throw_if_error_lazy(status, "Resetting " + device::detail_::identify(id_)); } /** diff --git a/src/cuda/api/error.hpp b/src/cuda/api/error.hpp index 809db553..4a2deee7 100644 --- a/src/cuda/api/error.hpp +++ b/src/cuda/api/error.hpp @@ -299,6 +299,14 @@ class runtime_error : public ::std::runtime_error { status_t code_; }; +#define throw_if_error_lazy(status__, ... ) \ +do { \ + status_t tie_status__ = static_cast(status__); \ + if (is_failure(tie_status__)) { \ + throw runtime_error(tie_status__, (__VA_ARGS__)); \ + } \ +} while(false) + // TODO: The following could use ::std::optional arguments - which would // prevent the need for dual versions of the functions - but we're // not writing C++17 here diff --git a/src/cuda/api/event.hpp b/src/cuda/api/event.hpp index 47c10614..fa9164e2 100644 --- a/src/cuda/api/event.hpp +++ b/src/cuda/api/event.hpp @@ -38,7 +38,7 @@ inline void destroy( inline void enqueue_in_current_context(stream::handle_t stream_handle, handle_t event_handle) { auto status = cuEventRecord(event_handle, stream_handle); - throw_if_error(status, + throw_if_error_lazy(status, "Failed recording " + event::detail_::identify(event_handle) + " on " + stream::detail_::identify(stream_handle)); } @@ -342,7 +342,7 @@ inline duration_t time_elapsed_between(const event_t& start, const event_t& end) { float elapsed_milliseconds; auto status = cuEventElapsedTime(&elapsed_milliseconds, start.handle(), end.handle()); - throw_if_error(status, "determining the time elapsed between events"); + throw_if_error_lazy(status, "determining the time elapsed between events"); return duration_t { elapsed_milliseconds }; } @@ -372,7 +372,7 @@ inline handle_t create_raw_in_current_context(flags_t flags = 0u) { cuda::event::handle_t new_event_handle; auto status = cuEventCreate(&new_event_handle, flags); - throw_if_error(status, "Failed creating a CUDA event"); + throw_if_error_lazy(status, "Failed creating a CUDA event"); return new_event_handle; } @@ -401,7 +401,7 @@ inline void destroy_in_current_context( context::handle_t current_context_handle) { auto status = cuEventDestroy(handle); - throw_if_error(status, "Failed destroying " + + throw_if_error_lazy(status, "Failed destroying " + identify(handle, current_context_handle, current_device_id)); } @@ -478,7 +478,7 @@ inline void wait(event_t event) auto event_handle = event.handle(); context::current::detail_::scoped_override_t context_for_this_scope(context_handle); auto status = cuEventSynchronize(event_handle); - throw_if_error(status, "Failed synchronizing " + event::detail_::identify(event)); + throw_if_error_lazy(status, "Failed synchronizing " + event::detail_::identify(event)); } inline void synchronize(event_t event) diff --git a/src/cuda/api/ipc.hpp b/src/cuda/api/ipc.hpp index 7948aca0..c04460b2 100644 --- a/src/cuda/api/ipc.hpp +++ b/src/cuda/api/ipc.hpp @@ -61,7 +61,7 @@ using handle_t = CUipcMemHandle; inline handle_t export_(void* device_ptr) { handle_t handle; auto status = cuIpcGetMemHandle(&handle, device::address(device_ptr)); - throw_if_error(status, "Failed producing an IPC memory handle for device pointer " + throw_if_error_lazy(status, "Failed producing an IPC memory handle for device pointer " + cuda::detail_::ptr_as_hex(device_ptr)); return handle; } @@ -81,7 +81,7 @@ inline T* import(const handle_t& handle) { CUdeviceptr device_ptr; auto status = cuIpcOpenMemHandle(&device_ptr, handle, CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS); - throw_if_error(status, "Failed obtaining a device pointer from an IPC memory handle"); + throw_if_error_lazy(status, "Failed obtaining a device pointer from an IPC memory handle"); return reinterpret_cast(device_ptr); } @@ -93,7 +93,7 @@ inline T* import(const handle_t& handle) inline void unmap(void* ipc_mapped_ptr) { auto status = cuIpcCloseMemHandle(device::address(ipc_mapped_ptr)); - throw_if_error(status, "Failed unmapping IPC memory mapped to " + cuda::detail_::ptr_as_hex(ipc_mapped_ptr)); + throw_if_error_lazy(status, "Failed unmapping IPC memory mapped to " + cuda::detail_::ptr_as_hex(ipc_mapped_ptr)); } /** @@ -162,7 +162,7 @@ inline handle_t export_(event::handle_t event_handle) { handle_t ipc_handle; auto status = cuIpcGetEventHandle(&ipc_handle, event_handle); - throw_if_error(status, "Failed obtaining an IPC event handle for " + + throw_if_error_lazy(status, "Failed obtaining an IPC event handle for " + event::detail_::identify(event_handle)); return ipc_handle; } @@ -171,7 +171,7 @@ inline event::handle_t import(const handle_t& handle) { event::handle_t event_handle; auto status = cuIpcOpenEventHandle(&event_handle, handle); - throw_if_error(status, "Failed obtaining an event handle from an IPC event handle"); + throw_if_error_lazy(status, "Failed obtaining an event handle from an IPC event handle"); return event_handle; } diff --git a/src/cuda/api/kernel.hpp b/src/cuda/api/kernel.hpp index 4f4b9f6a..0319d6ec 100644 --- a/src/cuda/api/kernel.hpp +++ b/src/cuda/api/kernel.hpp @@ -81,7 +81,7 @@ inline attribute_value_t get_attribute_in_current_context(handle_t handle, attri { kernel::attribute_value_t attribute_value; auto result = cuFuncGetAttribute(&attribute_value, attribute, handle); - throw_if_error(result, + throw_if_error_lazy(result, ::std::string("Failed obtaining attribute ") + attribute_name(attribute) ); return attribute_value; @@ -301,7 +301,7 @@ class kernel_t { { context::current::detail_::scoped_override_t set_context_for_this_context(context_handle_); auto result = cuFuncSetCacheConfig(handle(), (CUfunc_cache) preference); - throw_if_error(result, + throw_if_error_lazy(result, "Setting the multiprocessor L1/Shared Memory cache distribution preference for a " "CUDA device function"); } @@ -317,7 +317,7 @@ class kernel_t { // TODO: Need to set a context, not a device context::current::detail_::scoped_override_t set_context_for_this_context(context_handle_); auto result = cuFuncSetSharedMemConfig(handle(), static_cast(config) ); - throw_if_error(result, "Failed setting the shared memory bank size"); + throw_if_error_lazy(result, "Failed setting the shared memory bank size"); } protected: // ctors & dtor @@ -396,7 +396,7 @@ inline grid::dimension_t max_active_blocks_per_multiprocessor( auto flags = (unsigned) disable_caching_override ? CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE : CU_OCCUPANCY_DEFAULT; status = cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( &result, handle, (int) block_size_in_threads, dynamic_shared_memory_per_block, flags); - throw_if_error(status, + throw_if_error_lazy(status, "Determining the maximum occupancy in blocks per multiprocessor, given the block size and the amount of dyanmic memory per block"); return result; } @@ -426,7 +426,7 @@ inline grid::composite_dimensions_t min_grid_params_for_max_occupancy( disable_caching_override ? CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE : CU_OCCUPANCY_DEFAULT ); - throw_if_error(result, + throw_if_error_lazy(result, "Failed obtaining parameters for a minimum-size grid for " + kernel::detail_::identify(kernel_handle, device_id) + " with maximum occupancy given dynamic shared memory and block size data"); return { (grid::dimension_t) min_grid_size_in_blocks, (grid::block_dimension_t) block_size }; @@ -448,7 +448,7 @@ inline memory::shared::size_t max_dynamic_shared_memory_per_block( size_t result; auto status = cuOccupancyAvailableDynamicSMemPerBlock( &result, kernel.handle(), (int) blocks_on_multiprocessor, (int) block_size_in_threads); - throw_if_error(status, + throw_if_error_lazy(status, "Determining the available dynamic memory per block, given the number of blocks on a multiprocessor and their size"); return (memory::shared::size_t) result; } diff --git a/src/cuda/api/kernel_launch.hpp b/src/cuda/api/kernel_launch.hpp index 66b5b587..2ab49419 100644 --- a/src/cuda/api/kernel_launch.hpp +++ b/src/cuda/api/kernel_launch.hpp @@ -214,7 +214,7 @@ void enqueue_raw_kernel_launch( (size_t)launch_configuration.dynamic_shared_memory_size, cudaStream_t(stream_handle)); #endif // CUDA_VERSION >= 11000 - throw_if_error(status, "Cooperative kernel launch failed"); + throw_if_error_lazy(status, "Cooperative kernel launch failed"); #endif // CUDA_VERSION >= 9000 } } diff --git a/src/cuda/api/link.hpp b/src/cuda/api/link.hpp index 77a65171..83c9f28b 100644 --- a/src/cuda/api/link.hpp +++ b/src/cuda/api/link.hpp @@ -97,7 +97,7 @@ class link_t { void* cubin_output_start; size_t cubin_output_size; auto status = cuLinkComplete(handle_, &cubin_output_start, &cubin_output_size); - throw_if_error(status, + throw_if_error_lazy(status, "Failed completing the link with state at address " + cuda::detail_::ptr_as_hex(handle_)); return memory::region_t{cubin_output_start, cubin_output_size}; } @@ -116,7 +116,7 @@ class link_t { const_cast(marshalled_options.options()), const_cast(marshalled_options.values()) ); - throw_if_error(status, + throw_if_error_lazy(status, "Failed adding input " + ::std::string(image.name) + " of type " + ::std::to_string(image.type) + " to a link."); } @@ -131,7 +131,7 @@ class link_t { const_cast(marshalled_options.options()), const_cast(marshalled_options.values()) ); - throw_if_error(status, + throw_if_error_lazy(status, "Failed loading an object of type " + ::std::to_string(file_input.type) + " from file " + file_input.path); } @@ -171,7 +171,7 @@ class link_t { if (owning) { context::current::detail_::scoped_override_t set_context_for_this_scope(context_handle_); auto status = cuLinkDestroy(handle_); - throw_if_error(status, + throw_if_error_lazy(status, ::std::string("Failed destroying the link ") + detail_::ptr_as_hex(handle_) + " in " + context::detail_::identify(context_handle_, device_id_)); } @@ -212,7 +212,7 @@ inline link_t create(const link::options_t& options = link::options_t{}) const_cast(marshalled_options.values()), &new_link_handle ); - throw_if_error(status, "Failed creating a new link "); + throw_if_error_lazy(status, "Failed creating a new link "); auto do_take_ownership = true; auto context_handle = context::current::detail_::get_handle(); auto device_id = context::current::detail_::get_device_id(); diff --git a/src/cuda/api/memory.hpp b/src/cuda/api/memory.hpp index 2c3468bd..ac639a5b 100644 --- a/src/cuda/api/memory.hpp +++ b/src/cuda/api/memory.hpp @@ -151,7 +151,7 @@ inline cuda::memory::region_t allocate_in_current_context(size_t num_bytes) // Can this even happen? hopefully not status = (status_t) status::unknown; } - throw_if_error(status, "Failed allocating " + ::std::to_string(num_bytes) + + throw_if_error_lazy(status, "Failed allocating " + ::std::to_string(num_bytes) + " bytes of global memory on the current CUDA device"); return {as_pointer(allocated), num_bytes}; } @@ -185,7 +185,7 @@ inline region_t allocate( // Can this even happen? hopefully not status = static_cast(status::unknown); } - throw_if_error(status, + throw_if_error_lazy(status, "Failed scheduling an asynchronous allocation of " + ::std::to_string(num_bytes) + " bytes of global memory on " + stream::detail_::identify(stream_handle, context_handle) ); return {as_pointer(allocated), num_bytes}; @@ -239,7 +239,7 @@ inline void free( void* allocated_region_start) { auto status = cuMemFreeAsync(device::address(allocated_region_start), stream_handle); - throw_if_error(status, + throw_if_error_lazy(status, "Failed scheduling an asynchronous freeing of the global memory region starting at " + cuda::detail_::ptr_as_hex(allocated_region_start) + " on " + stream::detail_::identify(stream_handle, context_handle) ); @@ -842,7 +842,7 @@ void copy(array_t destination, array_t sourc params.dstPitch = params.srcPitch = dims.width * sizeof(T); auto status = //(source.context() == destination.context()) ? detail_::multidim_copy(source.context_handle(), params); - throw_if_error(status, "Copying from a CUDA array into a regular memory region"); + throw_if_error_lazy(status, "Copying from a CUDA array into a regular memory region"); } @@ -901,7 +901,7 @@ inline void copy(void* destination, const void* source, size_t num_bytes, stream // TODO: Determine whether it was from host to device, device to host etc and // add this information to the error string - throw_if_error(result, "Scheduling a memory copy on " + stream::detail_::identify(stream_handle)); + throw_if_error_lazy(result, "Scheduling a memory copy on " + stream::detail_::identify(stream_handle)); } /** @@ -1217,7 +1217,7 @@ inline void set(void* start, int byte_value, size_t num_bytes, stream::handle_t { // TODO: Double-check that this call doesn't require setting the current device auto result = cuMemsetD8Async(address(start), (unsigned char) byte_value, num_bytes, stream_handle); - throw_if_error(result, "asynchronously memsetting an on-device buffer"); + throw_if_error_lazy(result, "asynchronously memsetting an on-device buffer"); } inline void set(region_t region, int byte_value, stream::handle_t stream_handle) @@ -1251,7 +1251,7 @@ inline void typed_set(T* start, const T& value, size_t num_elements, stream::han case(2): result = cuMemsetD16Async(address(start), reinterpret_cast(value), num_elements, stream_handle); break; case(4): result = cuMemsetD32Async(address(start), reinterpret_cast(value), num_elements, stream_handle); break; } - throw_if_error(result, "Setting global device memory bytes"); + throw_if_error_lazy(result, "Setting global device memory bytes"); } } // namespace detail_ @@ -1330,7 +1330,7 @@ inline void copy( destination_context, reinterpret_cast(source_address), source_context, num_bytes); - throw_if_error(status, + throw_if_error_lazy(status, ::std::string("Failed copying data between devices: From address ") + cuda::detail_::ptr_as_hex(source_address) + " in " + context::detail_::identify(source_context) + " to address " @@ -1402,7 +1402,7 @@ inline void copy( // TODO: Determine whether it was from host to device, device to host etc and // add this information to the error string - throw_if_error(result, "Scheduling an inter-context memory copy from " + throw_if_error_lazy(result, "Scheduling an inter-context memory copy from " + context::detail_::identify(source_context_handle) + " to " + context::detail_::identify(destination_context_handle) + " on " + stream::detail_::identify(stream_handle)); @@ -1559,7 +1559,7 @@ struct deleter { inline void register_(const void *ptr, size_t size, unsigned flags) { auto result = cuMemHostRegister(const_cast(ptr), size, flags); - throw_if_error(result, + throw_if_error_lazy(result, "Could not register and page-lock the region of " + ::std::to_string(size) + " bytes of host memory at " + cuda::detail_::ptr_as_hex(ptr)); } @@ -1648,7 +1648,7 @@ inline void register_(const_region_t region) inline void deregister(const void *ptr) { auto result = cuMemHostUnregister(const_cast(ptr)); - throw_if_error(result, + throw_if_error_lazy(result, "Could not unregister the memory segment starting at address *a"); } @@ -1770,7 +1770,7 @@ inline T get_scalar_range_attribute(managed::const_region_t region, range_attrib uint32_t attribute_value { 0 }; auto result = cuMemRangeGetAttribute( &attribute_value, sizeof(attribute_value), attribute, device::address(region.start()), region.size()); - throw_if_error(result, + throw_if_error_lazy(result, "Obtaining an attribute for a managed memory range at " + cuda::detail_::ptr_as_hex(region.start())); return static_cast(attribute_value); } @@ -1780,7 +1780,7 @@ inline T get_scalar_range_attribute(managed::const_region_t region, range_attrib inline void advise(managed::const_region_t region, advice_t advice, cuda::device::id_t device_id) { auto result = cuMemAdvise(device::address(region.start()), region.size(), advice, device_id); - throw_if_error(result, "Setting an attribute for a managed memory range at " + throw_if_error_lazy(result, "Setting an attribute for a managed memory range at " + cuda::detail_::ptr_as_hex(region.start())); } @@ -1847,7 +1847,7 @@ inline region_t allocate_in_current_context( // Can this even happen? hopefully not status = (status_t) status::unknown; } - throw_if_error(status, "Failed allocating " + throw_if_error_lazy(status, "Failed allocating " + ::std::to_string(num_bytes) + " bytes of managed CUDA memory"); return {as_pointer(allocated), num_bytes}; } @@ -1862,7 +1862,7 @@ inline void free(void* ptr) { auto result = cuMemFree(device::address(ptr)); cuda::device::primary_context::detail_::decrease_refcount(cuda::device::default_device_id); - throw_if_error(result, "Freeing managed memory at " + cuda::detail_::ptr_as_hex(ptr)); + throw_if_error_lazy(result, "Freeing managed memory at " + cuda::detail_::ptr_as_hex(ptr)); } inline void free(region_t region) { @@ -1949,7 +1949,7 @@ region_t allocate(size_t num_bytes); inline void free(void* managed_ptr) { auto result = cuMemFree(device::address(managed_ptr)); - throw_if_error(result, + throw_if_error_lazy(result, "Freeing managed memory (host and device regions) at address " + cuda::detail_::ptr_as_hex(managed_ptr)); } @@ -1973,7 +1973,7 @@ namespace detail_ { inline void set(const_region_t region, kind_t advice, cuda::device::id_t device_id) { auto result = cuMemAdvise(device::address(region.start()), region.size(), (managed::detail_::advice_t) advice, device_id); - throw_if_error(result, "Setting advice on a (managed) memory region at" + throw_if_error_lazy(result, "Setting advice on a (managed) memory region at" + cuda::detail_::ptr_as_hex(region.start()) + " w.r.t. " + cuda::device::detail_::identify(device_id)); } @@ -1993,7 +1993,7 @@ inline void prefetch( stream::handle_t source_stream_handle) { auto result = cuMemPrefetchAsync(device::address(region.start()), region.size(), destination, source_stream_handle); - throw_if_error(result, + throw_if_error_lazy(result, "Prefetching " + ::std::to_string(region.size()) + " bytes of managed memory at address " + cuda::detail_::ptr_as_hex(region.start()) + " to " + ( (destination == CU_DEVICE_CPU) ? "the host" : cuda::device::detail_::identify(destination)) ); @@ -2038,7 +2038,7 @@ inline T* device_side_pointer_for(T* host_memory_ptr) &device_side_ptr, host_memory_ptr, get_device_pointer_flags); - throw_if_error(status, + throw_if_error_lazy(status, "Failed obtaining the device-side pointer for host-memory pointer " + cuda::detail_::ptr_as_hex(host_memory_ptr) + " supposedly mapped to device memory"); return as_pointer(device_side_ptr); @@ -2070,7 +2070,7 @@ inline region_pair allocate_in_current_context( // Can this even happen? hopefully not status = (status_t) status::named_t::unknown; } - throw_if_error(status, + throw_if_error_lazy(status, "Failed allocating a mapped pair of memory regions of size " + ::std::to_string(size_in_bytes) + " bytes of global memory in " + context::detail_::identify(current_context_handle)); allocated.device_side = device_side_pointer_for(allocated.host_side); @@ -2089,7 +2089,7 @@ inline region_pair allocate( inline void free(void* host_side_pair) { auto result = cuMemFreeHost(host_side_pair); - throw_if_error(result, "Freeing a mapped memory region pair with host-side address " + throw_if_error_lazy(result, "Freeing a mapped memory region pair with host-side address " + cuda::detail_::ptr_as_hex(host_side_pair)); } @@ -2146,7 +2146,7 @@ inline void free_region_pair_of(void* ptr) // We could check this... void* host_side_ptr; auto status = cuPointerGetAttribute (&host_side_ptr, CU_POINTER_ATTRIBUTE_HOST_POINTER, memory::device::address(ptr)); - throw_if_error(status, "Failed obtaining the host-side address of supposedly-device-side pointer " + throw_if_error_lazy(status, "Failed obtaining the host-side address of supposedly-device-side pointer " + cuda::detail_::ptr_as_hex(ptr)); detail_::free(host_side_ptr); } @@ -2186,9 +2186,9 @@ inline memory::region_t locate(T&& symbol) void *start; size_t symbol_size; auto api_call_result = cudaGetSymbolAddress(&start, ::std::forward(symbol)); - throw_if_error(api_call_result, "Could not locate the device memory address for a symbol"); + throw_if_error_lazy(api_call_result, "Could not locate the device memory address for a symbol"); api_call_result = cudaGetSymbolSize(&symbol_size, ::std::forward(symbol)); - throw_if_error(api_call_result, "Could not locate the device memory address for the symbol at address" + throw_if_error_lazy(api_call_result, "Could not locate the device memory address for the symbol at address" + cuda::detail_::ptr_as_hex(start)); return { start, symbol_size }; } diff --git a/src/cuda/api/miscellany.hpp b/src/cuda/api/miscellany.hpp index 897f0642..cf7fd530 100644 --- a/src/cuda/api/miscellany.hpp +++ b/src/cuda/api/miscellany.hpp @@ -30,7 +30,7 @@ inline void initialize_driver() { static constexpr const unsigned dummy_flags { 0 }; // this is the only allowed value for flags auto status = cuInit(dummy_flags); - throw_if_error(status, "Failed initializing the CUDA driver"); + throw_if_error_lazy(status, "Failed initializing the CUDA driver"); } inline void ensure_driver_is_initialized() diff --git a/src/cuda/api/module.hpp b/src/cuda/api/module.hpp index 7348e05a..263d1792 100644 --- a/src/cuda/api/module.hpp +++ b/src/cuda/api/module.hpp @@ -132,7 +132,7 @@ class module_t { CUdeviceptr dptr; size_t size; auto result = cuModuleGetGlobal(&dptr, &size, handle_, name); - throw_if_error(result, "Obtaining the address and size of a named global object"); + throw_if_error_lazy(result, "Obtaining the address and size of a named global object"); return { memory::as_pointer(dptr), size }; } @@ -240,7 +240,7 @@ inline module_t load_from_file_in_current_context( { handle_t new_module_handle; auto status = cuModuleLoad(&new_module_handle, path); - throw_if_error(status, ::std::string("Failed loading a module from file ") + path); + throw_if_error_lazy(status, ::std::string("Failed loading a module from file ") + path); bool do_take_ownership{true}; return wrap( current_context_device_id, @@ -366,7 +366,7 @@ inline void destroy(handle_t handle, context::handle_t context_handle, device::i { context::current::detail_::scoped_override_t set_context_for_this_scope(context_handle); auto status = cuModuleUnload(handle); - throw_if_error(status, "Failed unloading " + identify(handle, context_handle, device_id)); + throw_if_error_lazy(status, "Failed unloading " + identify(handle, context_handle, device_id)); } } // namespace detail_ diff --git a/src/cuda/api/multi_wrapper_impls/apriori_compiled_kernel.hpp b/src/cuda/api/multi_wrapper_impls/apriori_compiled_kernel.hpp index 4e978ebb..3e8bccb3 100644 --- a/src/cuda/api/multi_wrapper_impls/apriori_compiled_kernel.hpp +++ b/src/cuda/api/multi_wrapper_impls/apriori_compiled_kernel.hpp @@ -41,7 +41,7 @@ inline kernel::attributes_t apriori_compiled_kernel_t::attributes() const context::current::detail_::scoped_override_t set_context_for_this_scope(context_handle_); kernel::attributes_t function_attributes; auto status = cudaFuncGetAttributes(&function_attributes, ptr_); - throw_if_error(status, "Failed obtaining attributes for a CUDA device function"); + throw_if_error_lazy(status, "Failed obtaining attributes for a CUDA device function"); return function_attributes; } @@ -50,7 +50,7 @@ inline void apriori_compiled_kernel_t::set_cache_preference(multiprocessor_cache // Note: assuming the primary context is active context::current::detail_::scoped_override_t set_context_for_this_scope(context_handle_); auto result = cudaFuncSetCacheConfig(ptr_, (cudaFuncCache) preference); - throw_if_error(result, + throw_if_error_lazy(result, "Setting the multiprocessor L1/Shared Memory cache distribution preference for a " "CUDA device function"); } @@ -61,7 +61,7 @@ inline void apriori_compiled_kernel_t::set_shared_memory_bank_size( // Note: assuming the primary context is active context::current::detail_::scoped_override_t set_context_for_this_scope(context_handle_); auto result = cudaFuncSetSharedMemConfig(ptr_, (cudaSharedMemConfig) config); - throw_if_error(result); + throw_if_error_lazy(result, "Failed setting shared memory bank size to " + std::to_string(config)); } inline void apriori_compiled_kernel_t::set_attribute(kernel::attribute_t attribute, kernel::attribute_value_t value) const @@ -81,7 +81,7 @@ inline void apriori_compiled_kernel_t::set_attribute(kernel::attribute_t attribu } }(); auto result = cudaFuncSetAttribute(ptr_, runtime_attribute, value); - throw_if_error(result, "Setting CUDA device function attribute " + ::std::to_string(attribute) + " to value " + ::std::to_string(value)); + throw_if_error_lazy(result, "Setting CUDA device function attribute " + ::std::to_string(attribute) + " to value " + ::std::to_string(value)); } kernel::attribute_value_t apriori_compiled_kernel_t::get_attribute(kernel::attribute_t attribute) const diff --git a/src/cuda/api/multi_wrapper_impls/device.hpp b/src/cuda/api/multi_wrapper_impls/device.hpp index 023fea93..5efcf1b2 100644 --- a/src/cuda/api/multi_wrapper_impls/device.hpp +++ b/src/cuda/api/multi_wrapper_impls/device.hpp @@ -36,7 +36,7 @@ inline bool is_active(const device_t& device) inline void destroy(const device_t& device) { auto status = cuDevicePrimaryCtxReset(device.id()); - throw_if_error(status, "Failed destroying/resetting the primary context of device " + ::std::to_string(device.id())); + throw_if_error_lazy(status, "Failed destroying/resetting the primary context of device " + ::std::to_string(device.id())); } inline primary_context_t get(const device_t& device) diff --git a/src/cuda/api/multi_wrapper_impls/kernel.hpp b/src/cuda/api/multi_wrapper_impls/kernel.hpp index c6e364f8..be2fc8e9 100644 --- a/src/cuda/api/multi_wrapper_impls/kernel.hpp +++ b/src/cuda/api/multi_wrapper_impls/kernel.hpp @@ -68,7 +68,7 @@ inline void kernel_t::set_attribute(kernel::attribute_t attribute, kernel::attri #if CUDA_VERSION >= 9000 context::current::detail_::scoped_override_t set_context_for_this_context(context_handle_); auto result = cuFuncSetAttribute(handle_, static_cast(attribute), value); - throw_if_error(result, + throw_if_error_lazy(result, "Setting CUDA device function attribute " + ::std::string(kernel::detail_::attribute_name(attribute)) + " to value " + ::std::to_string(value) ); diff --git a/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp b/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp index 67950d1e..f4ad7585 100644 --- a/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp +++ b/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp @@ -86,7 +86,7 @@ inline void launch_type_erased_in_current_context( no_arguments_in_alternative_format ); } - throw_if_error(status, + throw_if_error_lazy(status, (lc.block_cooperation ? "Cooperative " : "") + ::std::string(" kernel launch failed for ") + kernel::detail_::identify(kernel_function_handle) + " on " + stream::detail_::identify(stream_handle, context_handle, device_id)); @@ -212,7 +212,7 @@ inline grid::composite_dimensions_t min_grid_params_for_max_occupancy( static_cast(block_size_limit), disable_caching_override ? cudaOccupancyDisableCachingOverride : cudaOccupancyDefault ); - throw_if_error(result, + throw_if_error_lazy(result, "Failed obtaining parameters for a minimum-size grid for kernel " + detail_::ptr_as_hex(ptr) + " on device " + ::std::to_string(device_id) + "."); return { (grid::dimension_t) min_grid_size_in_blocks, (grid::block_dimension_t) block_size }; diff --git a/src/cuda/api/multi_wrapper_impls/memory.hpp b/src/cuda/api/multi_wrapper_impls/memory.hpp index 2fa2d1fd..f421dab6 100644 --- a/src/cuda/api/multi_wrapper_impls/memory.hpp +++ b/src/cuda/api/multi_wrapper_impls/memory.hpp @@ -386,7 +386,7 @@ ::std::vector accessors(const_region_t region, const Alloca auto status = cuMemRangeGetAttribute( device_ids, sizeof(device_t) * devices.size(), CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY, device::address(region.start()), region.size()); - throw_if_error(status, "Obtaining the IDs of devices with access to the managed memory range at " + throw_if_error_lazy(status, "Obtaining the IDs of devices with access to the managed memory range at " + cuda::detail_::ptr_as_hex(region.start())); auto first_invalid_element = ::std::lower_bound(device_ids, device_ids + num_devices, cudaInvalidDeviceId); // We may have gotten less results that the set of all devices, so let's whittle that down @@ -487,7 +487,7 @@ inline void* allocate( // Can this even happen? hopefully not result = static_cast(status::named_t::unknown); } - throw_if_error(result, "Failed allocating " + ::std::to_string(size_in_bytes) + " bytes of host memory"); + throw_if_error_lazy(result, "Failed allocating " + ::std::to_string(size_in_bytes) + " bytes of host memory"); return allocated; } @@ -502,7 +502,7 @@ attribute_value_type_t get_attribute(const void *ptr) context::current::detail_::scoped_existence_ensurer_t ensure_we_have_some_context; attribute_value_type_t attribute_value; auto status = cuPointerGetAttribute(&attribute_value, attribute, device::address(ptr)); - throw_if_error(status, "Obtaining attribute " + ::std::to_string((int) attribute) + throw_if_error_lazy(status, "Obtaining attribute " + ::std::to_string((int) attribute) + " for pointer " + cuda::detail_::ptr_as_hex(ptr) ); return attribute_value; } @@ -512,7 +512,7 @@ inline void get_attributes(unsigned num_attributes, pointer::attribute_t* attrib { context::current::detail_::scoped_existence_ensurer_t ensure_we_have_some_context; auto status = cuPointerGetAttributes( num_attributes, attributes, value_ptrs, device::address(ptr) ); - throw_if_error(status, "Obtaining multiple attributes for pointer " + cuda::detail_::ptr_as_hex(ptr)); + throw_if_error_lazy(status, "Obtaining multiple attributes for pointer " + cuda::detail_::ptr_as_hex(ptr)); } } // namespace detail_ @@ -524,7 +524,7 @@ inline void copy(void *destination, const void *source, size_t num_bytes) auto result = cuMemcpy(device::address(destination), device::address(source), num_bytes); // TODO: Determine whether it was from host to device, device to host etc and // add this information to the error string - throw_if_error(result, "Synchronously copying data"); + throw_if_error_lazy(result, "Synchronously copying data"); } namespace device { @@ -545,7 +545,7 @@ inline void typed_set(T* start, const T& value, size_t num_elements) case(2): result = cuMemsetD16(address(start), reinterpret_cast(value), num_elements); break; case(4): result = cuMemsetD32(address(start), reinterpret_cast(value), num_elements); break; } - throw_if_error(result, "Setting global device memory bytes"); + throw_if_error_lazy(result, "Setting global device memory bytes"); } } // namespace device @@ -599,7 +599,7 @@ inline void set_access_mode( CUmemAccessDesc desc { { CU_MEM_LOCATION_TYPE_DEVICE, device.id() }, CUmemAccess_flags(access_mode) }; static constexpr const size_t count { 1 }; auto result = cuMemSetAccess(fully_mapped_region.device_address(), fully_mapped_region.size(), &desc, count); - throw_if_error(result, "Failed setting the access mode to the virtual memory mapping to the range of size " + throw_if_error_lazy(result, "Failed setting the access mode to the virtual memory mapping to the range of size " + ::std::to_string(fully_mapped_region.size()) + " bytes at " + cuda::detail_::ptr_as_hex(fully_mapped_region.data())); } @@ -620,7 +620,7 @@ inline void set_access_mode( } auto result = cuMemSetAccess( device::address(fully_mapped_region.start()), fully_mapped_region.size(), descriptors, devices.size()); - throw_if_error(result, "Failed setting the access mode to the virtual memory mapping to the range of size " + throw_if_error_lazy(result, "Failed setting the access mode to the virtual memory mapping to the range of size " + ::std::to_string(fully_mapped_region.size()) + " bytes at " + cuda::detail_::ptr_as_hex(fully_mapped_region.data())); } diff --git a/src/cuda/api/multi_wrapper_impls/module.hpp b/src/cuda/api/multi_wrapper_impls/module.hpp index 0c2e808b..52dd07fe 100644 --- a/src/cuda/api/multi_wrapper_impls/module.hpp +++ b/src/cuda/api/multi_wrapper_impls/module.hpp @@ -40,7 +40,7 @@ inline cuda::kernel_t module_t::get_kernel(const char* name) const context::current::detail_::scoped_override_t set_context_for_this_scope(context_handle_); kernel::handle_t kernel_function_handle; auto result = cuModuleGetFunction(&kernel_function_handle, handle_, name); - throw_if_error(result, ::std::string("Failed obtaining function ") + name + throw_if_error_lazy(result, ::std::string("Failed obtaining function ") + name + " from " + module::detail_::identify(*this)); return kernel::wrap(context::detail_::get_device_id(context_handle_), context_handle_, kernel_function_handle); } @@ -56,7 +56,7 @@ module_t create(const context_t& context, const void* module_data, Creator creat context::current::scoped_override_t set_context_for_this_scope(context); handle_t new_module_handle; auto status = creator_function(new_module_handle, module_data); - throw_if_error(status, ::std::string("Failed loading a module from memory location ") + throw_if_error_lazy(status, ::std::string("Failed loading a module from memory location ") + cuda::detail_::ptr_as_hex(module_data) + " within " + context::detail_::identify(context)); bool do_take_ownership { true }; @@ -129,7 +129,7 @@ inline CUsurfref module_t::get_surface(const char* name) const context::current::detail_::scoped_override_t set_context_for_this_scope(context_handle_); CUsurfref raw_surface_reference; auto status = cuModuleGetSurfRef(&raw_surface_reference, handle_, name); - throw_if_error(status, ::std::string("Failed obtaining a reference to surface \"") + name + "\" from " + throw_if_error_lazy(status, ::std::string("Failed obtaining a reference to surface \"") + name + "\" from " + module::detail_::identify(*this)); return raw_surface_reference; } @@ -139,7 +139,7 @@ inline CUtexref module_t::get_texture_reference(const char* name) const context::current::detail_::scoped_override_t set_context_for_this_scope(context_handle_); CUtexref raw_texture_reference; auto status = cuModuleGetTexRef(&raw_texture_reference, handle_, name); - throw_if_error(status, ::std::string("Failed obtaining a reference to texture \"") + name + "\" from " + throw_if_error_lazy(status, ::std::string("Failed obtaining a reference to texture \"") + name + "\" from " + module::detail_::identify(*this)); return raw_texture_reference; } diff --git a/src/cuda/api/multi_wrapper_impls/stream.hpp b/src/cuda/api/multi_wrapper_impls/stream.hpp index eab628b1..10691889 100644 --- a/src/cuda/api/multi_wrapper_impls/stream.hpp +++ b/src/cuda/api/multi_wrapper_impls/stream.hpp @@ -47,7 +47,7 @@ inline void record_event_in_current_context( event::handle_t event_handle) { auto status = cuEventRecord(event_handle, stream_handle); - throw_if_error(status, + throw_if_error_lazy(status, "Failed scheduling " + event::detail_::identify(event_handle) + " on " + stream::detail_::identify(stream_handle, current_context_handle_, current_device_id)); } @@ -85,7 +85,7 @@ inline void stream_t::enqueue_t::wait(const event_t& event_) const static constexpr const unsigned int flags = 0; auto status = cuStreamWaitEvent(associated_stream.handle_, event_.handle(), flags); - throw_if_error(status, + throw_if_error_lazy(status, "Failed scheduling a wait for " + event::detail_::identify(event_.handle()) + " on " + stream::detail_::identify(associated_stream)); @@ -152,7 +152,7 @@ inline void copy_attributes(const stream_t &dest, const stream_t &src) #endif context::current::detail_::scoped_override_t set_context_for_this_scope(dest.context_handle()); auto status = cuStreamCopyAttributes(dest.handle(), src.handle()); - throw_if_error(status, "Copying attributes from " + stream::detail_::identify(src) + throw_if_error_lazy(status, "Copying attributes from " + stream::detail_::identify(src) + " to " + stream::detail_::identify(src)); } diff --git a/src/cuda/api/pci_id.hpp b/src/cuda/api/pci_id.hpp index b0a6a008..16b3fe2d 100644 --- a/src/cuda/api/pci_id.hpp +++ b/src/cuda/api/pci_id.hpp @@ -53,7 +53,7 @@ inline id_t resolve_id(pci_location_t pci_id) ::std::string as_string { pci_id }; id_t cuda_device_id; auto result = cuDeviceGetByPCIBusId(&cuda_device_id, as_string.c_str()); - throw_if_error(result, + throw_if_error_lazy(result, "Failed obtaining a CUDA device ID corresponding to PCI id " + as_string); return cuda_device_id; } diff --git a/src/cuda/api/peer_to_peer.hpp b/src/cuda/api/peer_to_peer.hpp index cfa71a7a..1a3ceb93 100644 --- a/src/cuda/api/peer_to_peer.hpp +++ b/src/cuda/api/peer_to_peer.hpp @@ -42,7 +42,7 @@ inline attribute_value_t get_attribute(attribute_t attribute, id_t source, id_t { attribute_value_t value; auto status = cuDeviceGetP2PAttribute(&value, attribute, source, destination); - throw_if_error(status, "Failed obtaining peer-to-peer device attribute for device pair (" + throw_if_error_lazy(status, "Failed obtaining peer-to-peer device attribute for device pair (" + ::std::to_string(source) + ", " + ::std::to_string(destination) + ')'); return value; } @@ -51,7 +51,7 @@ inline bool can_access(const device::id_t accessor, const device::id_t peer) { int result; auto status = cuDeviceCanAccessPeer(&result, accessor, peer); - throw_if_error(status, "Failed determining whether " + device::detail_::identify(accessor) + throw_if_error_lazy(status, "Failed determining whether " + device::detail_::identify(accessor) + " can access " + device::detail_::identify(peer)); return (result == 1); } @@ -85,13 +85,13 @@ inline void enable_access_to(context::handle_t peer_context) enum : unsigned {fixed_flags = 0 }; // No flags are supported as of CUDA 8.0 auto status = cuCtxEnablePeerAccess(peer_context, fixed_flags); - throw_if_error(status, "Failed enabling access to peer " + context::detail_::identify(peer_context)); + throw_if_error_lazy(status, "Failed enabling access to peer " + context::detail_::identify(peer_context)); } inline void disable_access_to(context::handle_t peer_context) { auto status = cuCtxDisablePeerAccess(peer_context); - throw_if_error(status, "Failed disabling access to peer " + context::detail_::identify(peer_context)); + throw_if_error_lazy(status, "Failed disabling access to peer " + context::detail_::identify(peer_context)); } inline void enable_access(context::handle_t accessor, context::handle_t peer) diff --git a/src/cuda/api/primary_context.hpp b/src/cuda/api/primary_context.hpp index 80be8d9a..11ba809a 100644 --- a/src/cuda/api/primary_context.hpp +++ b/src/cuda/api/primary_context.hpp @@ -58,14 +58,14 @@ inline status_t decrease_refcount_nothrow(device::id_t device_id) noexcept inline void decrease_refcount(device::id_t device_id) { auto status = decrease_refcount_nothrow(device_id); - throw_if_error(status, "Failed releasing the reference to the primary context for " + device::detail_::identify(device_id)); + throw_if_error_lazy(status, "Failed releasing the reference to the primary context for " + device::detail_::identify(device_id)); } inline handle_t obtain_and_increase_refcount(device::id_t device_id) { handle_t primary_context_handle; auto status = cuDevicePrimaryCtxRetain(&primary_context_handle, device_id); - throw_if_error(status, + throw_if_error_lazy(status, "Failed obtaining (and possibly creating, and adding a reference count to) the primary context for " + device::detail_::identify(device_id)); return primary_context_handle; diff --git a/src/cuda/api/stream.hpp b/src/cuda/api/stream.hpp index 26b1331a..251b7591 100644 --- a/src/cuda/api/stream.hpp +++ b/src/cuda/api/stream.hpp @@ -118,7 +118,7 @@ inline handle_t create_raw_in_current_context( auto status = cuStreamCreateWithPriority(&new_stream_handle, flags, priority); // We could instead have used an equivalent Driver API call: // cuStreamCreateWithPriority(cuStreamCreateWithPriority(&new_stream_handle, flags, priority); - throw_if_error(status, "Failed creating a new stream in " + detail_::identify(new_stream_handle)); + throw_if_error_lazy(status, "Failed creating a new stream in " + detail_::identify(new_stream_handle)); return new_stream_handle; } @@ -127,7 +127,7 @@ inline context::handle_t context_handle_of(stream::handle_t stream_handle) { context::handle_t handle; auto result = cuStreamGetCtx(stream_handle, &handle); - throw_if_error(result, "Failed obtaining the context of " + cuda::detail_::ptr_as_hex(stream_handle)); + throw_if_error_lazy(result, "Failed obtaining the context of " + cuda::detail_::ptr_as_hex(stream_handle)); return handle; } #endif // CUDA_VERSION >= 9020 @@ -244,7 +244,7 @@ class stream_t { auto status = cuStreamGetFlags(handle_, &flags); // Could have used the equivalent Driver API call, // cuStreamGetFlags(handle_, &flags); - throw_if_error(status, "Failed obtaining flags for a stream in " + throw_if_error_lazy(status, "Failed obtaining flags for a stream in " + context::detail_::identify(context_handle_, device_id_)); return flags & CU_STREAM_NON_BLOCKING; } @@ -255,7 +255,7 @@ class stream_t { auto status = cuStreamGetPriority(handle_, &the_priority); // Could have used the equivalent Runtime API call: // cuStreamGetPriority(handle_, &the_priority); - throw_if_error(status, "Failed obtaining priority for a stream in " + throw_if_error_lazy(status, "Failed obtaining priority for a stream in " + context::detail_::identify(context_handle_, device_id_)); return the_priority; } @@ -548,7 +548,7 @@ class stream_t { // Could have used the equivalent Driver API call: cuAddStreamCallback() #endif - throw_if_error(status, "Failed scheduling a callback to be launched on " + throw_if_error_lazy(status, "Failed scheduling a callback to be launched on " + stream::detail_::identify(associated_stream.handle_, associated_stream.context_handle_, associated_stream.device_id_)); } @@ -623,7 +623,7 @@ class stream_t { auto status = cuStreamAttachMemAsync( associated_stream.handle_, memory::device::address(managed_region_start), length, flags); // Could have used the equivalent Driver API call cuStreamAttachMemAsync - throw_if_error(status, "Failed scheduling an attachment of a managed memory region on " + throw_if_error_lazy(status, "Failed scheduling an attachment of a managed memory region on " + stream::detail_::identify(associated_stream.handle_, associated_stream.context_handle_, associated_stream.device_id_)); } @@ -678,7 +678,7 @@ class stream_t { CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER; auto result = static_cast( stream::detail_::write_value(associated_stream.handle_, address, value, flags)); - throw_if_error(result, "Failed scheduling a write to global memory on " + throw_if_error_lazy(result, "Failed scheduling a write to global memory on " + stream::detail_::identify(associated_stream.handle_,associated_stream.context_handle_, + associated_stream.device_id_)); } @@ -708,7 +708,7 @@ class stream_t { (with_memory_barrier ? CU_STREAM_WAIT_VALUE_FLUSH : 0); auto result = static_cast( stream::detail_::wait_on_value(associated_stream.handle_, address, value, flags)); - throw_if_error(result, + throw_if_error_lazy(result, "Failed scheduling a wait on global memory address on " + stream::detail_::identify( associated_stream.handle_, @@ -795,7 +795,7 @@ class stream_t { context::current::detail_::scoped_override_t set_context_for_this_scope(context_handle_); CUstreamAttrValue wrapped_result{}; auto status = cuStreamGetAttribute(handle_, CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY, &wrapped_result); - throw_if_error(status, ::std::string("Obtaining the synchronization policy of ") + stream::detail_::identify(*this)); + throw_if_error_lazy(status, ::std::string("Obtaining the synchronization policy of ") + stream::detail_::identify(*this)); return static_cast(wrapped_result.syncPolicy); } @@ -805,7 +805,7 @@ class stream_t { CUstreamAttrValue wrapped_value{}; wrapped_value.syncPolicy = static_cast(policy); auto status = cuStreamSetAttribute(handle_, CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY, &wrapped_value); - throw_if_error(status, ::std::string("Setting the synchronization policy of ") + stream::detail_::identify(*this)); + throw_if_error_lazy(status, ::std::string("Setting the synchronization policy of ") + stream::detail_::identify(*this)); } #endif @@ -1019,7 +1019,7 @@ using queue_id_t = stream::handle_t; inline void synchronize(const stream_t& stream) { auto status = cuStreamSynchronize(stream.handle()); - throw_if_error(status, "Failed synchronizing " + stream::detail_::identify(stream)); + throw_if_error_lazy(status, "Failed synchronizing " + stream::detail_::identify(stream)); } #if CUDA_VERSION >= 11000 diff --git a/src/cuda/api/texture_view.hpp b/src/cuda/api/texture_view.hpp index 9d89248f..de79d921 100644 --- a/src/cuda/api/texture_view.hpp +++ b/src/cuda/api/texture_view.hpp @@ -116,7 +116,7 @@ class texture_view { resource_descriptor.res.array.hArray = arr.get(); auto status = cuTexObjectCreate(&raw_view_handle, &resource_descriptor, &descriptor, nullptr); - throw_if_error(status, "failed creating a CUDA texture object"); + throw_if_error_lazy(status, "failed creating a CUDA texture object"); } public: // operators @@ -126,7 +126,7 @@ class texture_view { if (owning) { scoped_context_setter set_context(context_handle_); auto status = cuTexObjectDestroy(raw_view_handle); - throw_if_error(status, "failed destroying texture object"); + throw_if_error_lazy(status, "failed destroying texture object"); } } diff --git a/src/cuda/api/versions.hpp b/src/cuda/api/versions.hpp index ec087e69..998e4ed6 100644 --- a/src/cuda/api/versions.hpp +++ b/src/cuda/api/versions.hpp @@ -133,7 +133,7 @@ inline version_t driver() { combined_version_t version; auto status = cuDriverGetVersion(&version); // The same value would be returned using cuDriverGetVersion() - throw_if_error(status, "Failed obtaining the CUDA driver version"); + throw_if_error_lazy(status, "Failed obtaining the CUDA driver version"); return version_t::from_single_number(version); } @@ -146,7 +146,7 @@ inline version_t driver() { inline version_t runtime() { combined_version_t version; auto status = cudaRuntimeGetVersion(&version); - throw_if_error(status, "Failed obtaining the CUDA runtime version"); + throw_if_error_lazy(status, "Failed obtaining the CUDA runtime version"); return version_t::from_single_number(version); } diff --git a/src/cuda/api/virtual_memory.hpp b/src/cuda/api/virtual_memory.hpp index d577c0d2..6fbfe77c 100644 --- a/src/cuda/api/virtual_memory.hpp +++ b/src/cuda/api/virtual_memory.hpp @@ -25,7 +25,7 @@ namespace detail_ { inline void cancel_reservation(memory::region_t reserved) { auto status = cuMemAddressFree(memory::device::address(reserved.start()), reserved.size()); - throw_if_error(status, "Failed freeing a reservation of " + memory::detail_::identify(reserved)); + throw_if_error_lazy(status, "Failed freeing a reservation of " + memory::detail_::identify(reserved)); } } // namespace detail_ @@ -90,7 +90,7 @@ inline reserved_address_range_t reserve(region_t requested_region, alignment_t a unsigned long flags { 0 }; CUdeviceptr ptr; auto status = cuMemAddressReserve(&ptr, requested_region.size(), alignment, requested_region.device_address(), flags); - throw_if_error(status, "Failed making a reservation of " + cuda::memory::detail_::identify(requested_region) + throw_if_error_lazy(status, "Failed making a reservation of " + cuda::memory::detail_::identify(requested_region) + " with alignment value " + ::std::to_string(alignment)); bool is_owning { true }; return detail_::wrap(memory::region_t { ptr, requested_region.size() }, alignment, is_owning); @@ -149,7 +149,7 @@ struct properties_t { size_t result; auto status = cuMemGetAllocationGranularity(&result, &raw, static_cast(granuality_kind)); - throw_if_error(status, "Could not determine physical allocation granularity"); + throw_if_error_lazy(status, "Could not determine physical allocation granularity"); return result; } @@ -219,7 +219,7 @@ class physical_allocation_t { ~physical_allocation_t() { if (not holds_refcount_unit_) { return; } auto result = cuMemRelease(handle_); - throw_if_error(result, "Failed making a virtual memory physical_allocation of size " + ::std::to_string(size_)); + throw_if_error_lazy(result, "Failed making a virtual memory physical_allocation of size " + ::std::to_string(size_)); } public: // non-mutators @@ -232,7 +232,7 @@ class physical_allocation_t { physical_allocation::properties_t properties() const { CUmemAllocationProp raw_properties; auto status = cuMemGetAllocationPropertiesFromHandle(&raw_properties, handle_); - throw_if_error(status, "Obtaining the properties of a virtual memory physical_allocation with handle " + ::std::to_string(handle_)); + throw_if_error_lazy(status, "Obtaining the properties of a virtual memory physical_allocation with handle " + ::std::to_string(handle_)); return { raw_properties }; } @@ -242,7 +242,7 @@ class physical_allocation_t { physical_allocation::shared_handle_t shared_handle_; static constexpr const unsigned long long flags { 0 }; auto result = cuMemExportToShareableHandle(&shared_handle_, handle_, (CUmemAllocationHandleType) SharedHandleKind, flags); - throw_if_error(result, "Exporting a (generic CUDA) shared memory physical_allocation to a shared handle"); + throw_if_error_lazy(result, "Exporting a (generic CUDA) shared memory physical_allocation to a shared handle"); return shared_handle_; } @@ -259,7 +259,7 @@ inline physical_allocation_t create(size_t size, properties_t properties) static constexpr const unsigned long long flags { 0 }; CUmemGenericAllocationHandle handle; auto result = cuMemCreate(&handle, size, &properties.raw, flags); - throw_if_error(result, "Failed making a virtual memory physical_allocation of size " + ::std::to_string(size)); + throw_if_error_lazy(result, "Failed making a virtual memory physical_allocation of size " + ::std::to_string(size)); static constexpr const bool is_owning { true }; return detail_::wrap(handle, size, is_owning); } @@ -282,7 +282,7 @@ inline properties_t properties_of(handle_t handle) { CUmemAllocationProp prop; auto result = cuMemGetAllocationPropertiesFromHandle (&prop, handle); - throw_if_error(result, "Failed obtaining the properties of the virtual memory physical_allocation with handle " + throw_if_error_lazy(result, "Failed obtaining the properties of the virtual memory physical_allocation with handle " + ::std::to_string(handle)); return { prop }; } @@ -306,7 +306,7 @@ physical_allocation_t import(shared_handle_t shared_handle, si handle_t result_handle; auto result = cuMemImportFromShareableHandle( &result_handle, reinterpret_cast(shared_handle), CUmemAllocationHandleType(SharedHandleKind)); - throw_if_error(result, "Failed importing a virtual memory physical_allocation from a shared handle "); + throw_if_error_lazy(result, "Failed importing a virtual memory physical_allocation from a shared handle "); return physical_allocation::detail_::wrap(result_handle, size, holds_refcount_unit); } @@ -346,7 +346,7 @@ inline access_mode_t get_access_mode(region_t fully_mapped_region, cuda::device: CUmemLocation_st location { CU_MEM_LOCATION_TYPE_DEVICE, device_id }; unsigned long long flags; auto result = cuMemGetAccess(&flags, &location, fully_mapped_region.device_address() ); - throw_if_error(result, "Failed determining the access mode for " + throw_if_error_lazy(result, "Failed determining the access mode for " + cuda::device::detail_::identify(device_id) + " to the virtual memory mapping to the range of size " + ::std::to_string(fully_mapped_region.size()) + " bytes at " + cuda::detail_::ptr_as_hex(fully_mapped_region.data())); @@ -460,7 +460,7 @@ class mapping_t { { if (not owning_) { return; } auto result = cuMemUnmap(address_range_.device_address(), address_range_.size()); - throw_if_error(result, "Failed unmapping " + mapping::detail_::identify(address_range_)); + throw_if_error_lazy(result, "Failed unmapping " + mapping::detail_::identify(address_range_)); } public: @@ -470,7 +470,7 @@ class mapping_t { { CUmemGenericAllocationHandle allocation_handle; auto status = cuMemRetainAllocationHandle(&allocation_handle, address_range_.data()); - throw_if_error(status, " Failed obtaining/retaining the physical_allocation handle for the virtual memory " + throw_if_error_lazy(status, " Failed obtaining/retaining the physical_allocation handle for the virtual memory " "range mapped to " + cuda::detail_::ptr_as_hex(address_range_.data()) + " of size " + ::std::to_string(address_range_.size()) + " bytes"); constexpr const bool increase_refcount{false}; @@ -508,7 +508,7 @@ inline mapping_t map(region_t region, physical_allocation_t physical_allocation) constexpr const unsigned long long flags { 0 }; auto handle = physical_allocation.handle(); auto status = cuMemMap(region.device_address(), region.size(), offset_into_allocation, handle, flags); - throw_if_error(status, "Failed making a virtual memory mapping of " + throw_if_error_lazy(status, "Failed making a virtual memory mapping of " + physical_allocation::detail_::identify(physical_allocation) + " to the range of size " + ::std::to_string(region.size()) + " bytes at " + cuda::detail_::ptr_as_hex(region.data())); diff --git a/src/cuda/nvrtc/compilation_output.hpp b/src/cuda/nvrtc/compilation_output.hpp index 382b9362..c918d8b9 100644 --- a/src/cuda/nvrtc/compilation_output.hpp +++ b/src/cuda/nvrtc/compilation_output.hpp @@ -74,14 +74,14 @@ inline size_t get_log_size(program::handle_t program_handle, const char* program { size_t size; auto status = nvrtcGetProgramLogSize(program_handle, &size); - throw_if_error(status, "Failed obtaining compilation log size for " + identify(program_handle, program_name)); + throw_if_error_lazy(status, "Failed obtaining compilation log size for " + identify(program_handle, program_name)); return size; } inline void get_log(char* buffer, program::handle_t program_handle, const char *program_name = nullptr) { auto status = nvrtcGetProgramLog(program_handle, buffer); - throw_if_error(status, "Failed obtaining NVRTC program compilation log for" + throw_if_error_lazy(status, "Failed obtaining NVRTC program compilation log for" + identify(program_handle, program_name)); } @@ -91,7 +91,7 @@ inline size_t get_cubin_size(program::handle_t program_handle, const char* progr { size_t size; auto status = nvrtcGetCUBINSize(program_handle, &size); - throw_if_error(status, "Failed obtaining NVRTC program output CUBIN size"); + throw_if_error_lazy(status, "Failed obtaining NVRTC program output CUBIN size"); if (size == 0) { throw ::std::runtime_error("CUBIN requested for a program compiled for a virtual architecture only: " + identify(program_handle, program_name)); @@ -102,7 +102,7 @@ inline size_t get_cubin_size(program::handle_t program_handle, const char* progr inline void get_cubin(char* buffer, program::handle_t program_handle, const char *program_name = nullptr) { auto status = nvrtcGetCUBIN(program_handle, buffer); - throw_if_error(status, "Failed obtaining NVRTC program output CUBIN for " + throw_if_error_lazy(status, "Failed obtaining NVRTC program output CUBIN for " + identify(program_handle, program_name)); } @@ -113,7 +113,7 @@ inline size_t get_ptx_size(program::handle_t program_handle, const char *program { size_t size; auto status = nvrtcGetPTXSize(program_handle, &size); - throw_if_error(status, "Failed obtaining NVRTC program output PTX size for " + throw_if_error_lazy(status, "Failed obtaining NVRTC program output PTX size for " + identify(program_handle, program_name)); return size; } @@ -121,7 +121,7 @@ inline size_t get_ptx_size(program::handle_t program_handle, const char *program inline void get_ptx(char* buffer, program::handle_t program_handle, const char *program_name = nullptr) { auto status = nvrtcGetPTX(program_handle, buffer); - throw_if_error(status, "Failed obtaining NVRTC program output PTX for " + throw_if_error_lazy(status, "Failed obtaining NVRTC program output PTX for " + identify(program_handle, program_name)); } @@ -131,7 +131,7 @@ inline size_t get_nvvm_size(program::handle_t program_handle, const char *progra { size_t size; auto status = nvrtcGetNVVMSize(program_handle, &size); - throw_if_error(status, "Failed obtaining NVRTC program output NVVM size for " + throw_if_error_lazy(status, "Failed obtaining NVRTC program output NVVM size for " + identify(program_handle, program_name)); return size; } @@ -139,7 +139,7 @@ inline size_t get_nvvm_size(program::handle_t program_handle, const char *progra inline void get_nvvm(char* buffer, program::handle_t program_handle, const char *program_name = nullptr) { auto status = nvrtcGetNVVM(program_handle, buffer); - throw_if_error(status, "Failed obtaining NVRTC program output NVVM for " + throw_if_error_lazy(status, "Failed obtaining NVRTC program output NVVM for " + identify(program_handle, program_name)); } @@ -258,7 +258,7 @@ class compilation_output_t { size_t size; auto status = nvrtcGetPTXSize(program_handle_, &size); if (status == NVRTC_ERROR_INVALID_PROGRAM) { return false; } - throw_if_error(status, "Failed determining whether the NVRTC program has a compiled PTX result: " + throw_if_error_lazy(status, "Failed determining whether the NVRTC program has a compiled PTX result: " + compilation_output::detail_::identify(*this)); if (size == 0) { throw ::std::logic_error("PTX size reported as 0 by " @@ -303,7 +303,7 @@ class compilation_output_t { size_t size; auto status = nvrtcGetCUBINSize(program_handle_, &size); if (status == NVRTC_ERROR_INVALID_PROGRAM) { return false; } - throw_if_error(status, "Failed determining whether the NVRTC program has a compiled CUBIN result: " + throw_if_error_lazy(status, "Failed determining whether the NVRTC program has a compiled CUBIN result: " + compilation_output::detail_::identify(*this)); return (size > 0); } @@ -349,7 +349,7 @@ class compilation_output_t { size_t size; auto status = nvrtcGetNVVMSize(program_handle_, &size); if (status == NVRTC_ERROR_INVALID_PROGRAM) { return false; } - throw_if_error(status, "Failed determining whether the NVRTC program has a compiled NVVM result: " + throw_if_error_lazy(status, "Failed determining whether the NVRTC program has a compiled NVVM result: " + compilation_output::detail_::identify(*this)); if (size == 0) { throw ::std::logic_error("NVVM size reported as 0 by NVRTC for program: " @@ -373,7 +373,7 @@ class compilation_output_t { { const char* result; auto status = nvrtcGetLoweredName(program_handle_, unmangled_name, &result); - throw_if_error(status, ::std::string("Failed obtaining the mangled form of name \"") + throw_if_error_lazy(status, ::std::string("Failed obtaining the mangled form of name \"") + unmangled_name + "\" in dynamically-compiled program \"" + program_name_ + '\"'); return result; } @@ -417,7 +417,7 @@ class compilation_output_t { { if (owns_handle_) { auto status = nvrtcDestroyProgram(&program_handle_); - throw_if_error(status, "Destroying " + program::detail_::identify(program_handle_, program_name_.c_str())); + throw_if_error_lazy(status, "Destroying " + program::detail_::identify(program_handle_, program_name_.c_str())); } } diff --git a/src/cuda/nvrtc/program.hpp b/src/cuda/nvrtc/program.hpp index 984087f0..e9a66437 100644 --- a/src/cuda/nvrtc/program.hpp +++ b/src/cuda/nvrtc/program.hpp @@ -45,14 +45,14 @@ inline program::handle_t create( program::handle_t program_handle; auto status = nvrtcCreateProgram( &program_handle, program_source, program_name, (int) num_headers, header_sources, header_names); - throw_if_error(status, "Failed creating an NVRTC program (named " + ::std::string(program_name) + ')'); + throw_if_error_lazy(status, "Failed creating an NVRTC program (named " + ::std::string(program_name) + ')'); return program_handle; } inline void register_global(handle_t program_handle, const char *global_to_register) { auto status = nvrtcAddNameExpression(program_handle, global_to_register); - throw_if_error(status, "Failed registering global entity " + ::std::string(global_to_register) + throw_if_error_lazy(status, "Failed registering global entity " + ::std::string(global_to_register) + " with " + identify(program_handle)); } @@ -387,10 +387,10 @@ supported_targets() { int num_supported_archs; auto status = nvrtcGetNumSupportedArchs(&num_supported_archs); - throw_if_error(status, "Failed obtaining the number of target NVRTC architectures"); + throw_if_error_lazy(status, "Failed obtaining the number of target NVRTC architectures"); auto raw_archs = ::std::unique_ptr(new int[num_supported_archs]); status = nvrtcGetSupportedArchs(raw_archs.get()); - throw_if_error(status, "Failed obtaining the architectures supported by NVRTC"); + throw_if_error_lazy(status, "Failed obtaining the architectures supported by NVRTC"); dynarray result; result.reserve(num_supported_archs); ::std::transform(raw_archs.get(), raw_archs.get() + num_supported_archs, ::std::back_inserter(result), diff --git a/src/cuda/nvrtc/versions.hpp b/src/cuda/nvrtc/versions.hpp index 1a8e5d7e..bfa0daff 100644 --- a/src/cuda/nvrtc/versions.hpp +++ b/src/cuda/nvrtc/versions.hpp @@ -21,7 +21,7 @@ namespace version_numbers { inline version_t nvrtc() { version_t version; auto status = nvrtcVersion(&version.major, &version.minor); - throw_if_error(status, "Failed obtaining the NVRTC library version"); + throw_if_error_lazy(status, "Failed obtaining the NVRTC library version"); return version; } diff --git a/src/cuda/nvtx/profiling.hpp b/src/cuda/nvtx/profiling.hpp index b2f288a1..0bf945f4 100644 --- a/src/cuda/nvtx/profiling.hpp +++ b/src/cuda/nvtx/profiling.hpp @@ -194,7 +194,7 @@ inline void range_end(range::handle_t range_handle) inline void start() { auto status = cuProfilerStart(); - throw_if_error(status, "Starting CUDA profiling"); + throw_if_error_lazy(status, "Starting CUDA profiling"); } /** @@ -203,7 +203,7 @@ inline void start() inline void stop() { auto status = cuProfilerStop(); - throw_if_error(status, "Stopping CUDA profiling"); + throw_if_error_lazy(status, "Stopping CUDA profiling"); } } // namespace profiling