diff --git a/src/cuda/api/current_context.hpp b/src/cuda/api/current_context.hpp index 487e7f0d..96202e9f 100644 --- a/src/cuda/api/current_context.hpp +++ b/src/cuda/api/current_context.hpp @@ -45,18 +45,37 @@ inline bool is_(handle_t handle) } } +struct status_and_handle_pair { status_t status; handle_t handle; }; + /** * Returns a raw handle for the current CUDA context * * @return the raw handle from the CUDA driver - if one exists; none - * if no context is current/active. + * if no context is current/active (e.g. if the driver has not + * been initialized). Also returns the status reported by the driver + * following the attempt to obtain the context. */ -inline handle_t get_handle() +inline status_and_handle_pair get_with_status() { handle_t handle; auto status = cuCtxGetCurrent(&handle); - throw_if_error(status, "Failed obtaining the current context's handle"); - return handle; + if (status == status::not_yet_initialized) { + handle = context::detail_::none; + } + return { status, handle }; +} + +/** + * Returns a raw handle for the current CUDA context + * + * @return the raw handle from the CUDA driver - if one exists; none + * if no context is current/active. + */ +inline handle_t get_handle() +{ + auto p = get_with_status(); + throw_if_error(p.status, "Failed obtaining the current context's handle"); + return p.handle; } // Note: not calling this get_ since flags are read-only anyway diff --git a/src/cuda/api/multi_wrapper_impls/context.hpp b/src/cuda/api/multi_wrapper_impls/context.hpp index 4742c143..ad466f52 100644 --- a/src/cuda/api/multi_wrapper_impls/context.hpp +++ b/src/cuda/api/multi_wrapper_impls/context.hpp @@ -107,14 +107,14 @@ inline handle_t push_default_if_missing() } /** - * @note This specialized scope setter is used in API calls which aren't provided a context - * as a parameter, and when it may be the case that no context is current. Such API calls - * are generally supposed to be independent of a specific context; but - CUDA still often - * expects some context to exist and be current to perform whatever it is we want it to do. - * It would be unreasonable to create new contexts for the purposes of such calls - as then, - * the caller would often need to maintain these contexts after the call. Instead, we fall - * back on a primary context of one of the devices - and since no particular device is - * specified, we choose that to be the default device. When we do want the caller to keep + * @note This specialized scope context setter is used in API calls which aren't provided a + * context as a parameter, and when it may be the case that no context is current. Such API + * calls are generally supposed to be independent of a specific context; but - CUDA still + * often expects some context to exist and be current to perform whatever it is we want it + * to do. It would be unreasonable to create new contexts for the purposes of such calls - + * as then, the caller would often need to maintain these contexts after the call. Instead, + * we fall back on a primary context of one of the devices - and since no particular device + * is specified, we choose that to be the default device. When we do want the caller to keep * a context alive - we increase the primary context's refererence count, keeping it alive * automatically. In these situations, the ref unit "leaks" past the scope of the ensurer * object - but the instantiator would be aware of this, having asked for such behavior @@ -130,15 +130,28 @@ class scoped_existence_ensurer_t { bool decrease_pc_refcount_on_destruct_; explicit scoped_existence_ensurer_t(bool avoid_pc_refcount_increase = true) - : context_handle(get_handle()), - decrease_pc_refcount_on_destruct_(avoid_pc_refcount_increase) { + auto status_and_handle = get_with_status(); + if (status_and_handle.status == cuda::status::not_yet_initialized) { + context_handle = context::detail_::none; + initialize_driver(); // and the handle + } + else { + context_handle = status_and_handle.handle; + } if (context_handle == context::detail_::none) { device_id_ = device::current::detail_::get_id(); context_handle = device::primary_context::detail_::obtain_and_increase_refcount(device_id_); context::current::detail_::push(context_handle); + decrease_pc_refcount_on_destruct_ = avoid_pc_refcount_increase; + } + else { + // Some compilers fail to detect that device_id is never used + // unless it's initialized, and thus warns us of maybe-uninitialized + // use, so... + device_id_ = 0; + decrease_pc_refcount_on_destruct_ = false; } - else { decrease_pc_refcount_on_destruct_ = false; } } ~scoped_existence_ensurer_t() diff --git a/src/cuda/nvtx/profiling.hpp b/src/cuda/nvtx/profiling.hpp index 2fdcdc87..b2f288a1 100644 --- a/src/cuda/nvtx/profiling.hpp +++ b/src/cuda/nvtx/profiling.hpp @@ -271,6 +271,8 @@ class scope { public: scope() { start(); } ~scope() { stop(); } +protected: + context::current::detail_::scoped_existence_ensurer_t context_existence_ensurer; }; #define profile_this_scope() ::cuda::profiling::scope cuda_profiling_scope_{};