diff --git a/src/cuda/api/kernel_launch.hpp b/src/cuda/api/kernel_launch.hpp
index ce63f0b8..a57092d9 100644
--- a/src/cuda/api/kernel_launch.hpp
+++ b/src/cuda/api/kernel_launch.hpp
@@ -188,7 +188,7 @@ void enqueue_raw_kernel_launch_in_current_context(
 		// look at things.
 		detail_::collect_argument_addresses(argument_ptrs, ::std::forward<KernelParameters>(parameters)...);
 #if CUDA_VERSION >= 11000
-		kernel::handle_t kernel_function_handle = kernel::detail_::get_handle( (const void*) kernel_function);
+		kernel::handle_t kernel_function_handle = kernel::apriori_compiled::detail_::get_handle( (const void*) kernel_function);
 		auto status = cuLaunchCooperativeKernel(
 			kernel_function_handle,
 			launch_configuration.dimensions.grid.x,
@@ -248,7 +248,7 @@ struct raw_kernel_typegen {
 
 template<typename... KernelParameters>
 typename detail_::raw_kernel_typegen<KernelParameters...>::type
-unwrap(const apriori_compiled_kernel_t& kernel)
+unwrap(const kernel::apriori_compiled_t& kernel)
 {
 	using raw_kernel_t = typename detail_::raw_kernel_typegen<KernelParameters ...>::type;
 	return reinterpret_cast<raw_kernel_t>(const_cast<void *>(kernel.ptr()));
@@ -259,9 +259,9 @@ unwrap(const apriori_compiled_kernel_t& kernel)
 namespace detail_ {
 
 template<typename... KernelParameters>
-struct enqueue_launch_helper<apriori_compiled_kernel_t, KernelParameters...> {
+struct enqueue_launch_helper<kernel::apriori_compiled_t, KernelParameters...> {
 	void operator()(
-		const apriori_compiled_kernel_t&  wrapped_kernel,
+		const kernel::apriori_compiled_t&  wrapped_kernel,
 		const stream_t &                  stream,
 		launch_configuration_t            launch_configuration,
 		KernelParameters &&...            parameters) const;
diff --git a/src/cuda/api/kernels/apriori_compiled.hpp b/src/cuda/api/kernels/apriori_compiled.hpp
index e491647e..c9b9a935 100644
--- a/src/cuda/api/kernels/apriori_compiled.hpp
+++ b/src/cuda/api/kernels/apriori_compiled.hpp
@@ -18,12 +18,13 @@
 
 namespace cuda {
 
+namespace kernel {
+
 ///@cond
-class device_t;
-class apriori_compiled_kernel_t;
+class apriori_compiled_t;
 ///@nocond
 
-namespace kernel {
+namespace apriori_compiled {
 
 namespace detail_ {
 
@@ -46,16 +47,16 @@ inline handle_t get_handle(const void *kernel_function_ptr, const char* name = n
 }
 #endif
 
-apriori_compiled_kernel_t wrap(
+apriori_compiled_t wrap(
 	device::id_t device_id,
 	context::handle_t primary_context_handle,
 	kernel::handle_t f,
 	const void* ptr,
 	bool hold_primary_context_refcount_unit = false);
 
-
 } // namespace detail_
 
+
 #if ! CAN_GET_APRIORI_KERNEL_HANDLE
 /**
  * @brief a wrapper around `cudaFuncAttributes`, offering
@@ -299,13 +300,13 @@ inline grid::dimension_t max_active_blocks_per_multiprocessor(
 
 } // namespace occupancy
 
-} // namespace kernel
+} // namespace apriori_compiled
 
 /**
  * @brief A subclass of the @ref `kernel_t` interface for kernels being
  * functions marked as __global__ in source files and compiled apriori.
  */
-class apriori_compiled_kernel_t final : public kernel_t {
+class apriori_compiled_t final : public kernel_t {
 public: // getters
 	const void *ptr() const noexcept { return ptr_; }
 	const void *get() const noexcept { return ptr_; }
@@ -316,7 +317,7 @@ class apriori_compiled_kernel_t final : public kernel_t {
 public: // non-mutators
 
 #if ! CAN_GET_APRIORI_KERNEL_HANDLE
-	kernel::attributes_t attributes() const;
+	apriori_compiled::attributes_t attributes() const;
 	void set_cache_preference(multiprocessor_cache_preference_t preference) const override;
 	void set_shared_memory_bank_size(multiprocessor_shared_memory_bank_size_option_t config) const override;
 
@@ -346,7 +347,7 @@ class apriori_compiled_kernel_t final : public kernel_t {
 	{
 		auto shared_memory_size_determiner =
 			[dynamic_shared_memory_size](int) -> size_t { return dynamic_shared_memory_size; };
-		return kernel::occupancy::detail_::min_grid_params_for_max_occupancy(
+		return kernel::apriori_compiled::occupancy::detail_::min_grid_params_for_max_occupancy(
 			ptr(), device_id(),
 			shared_memory_size_determiner,
 			block_size_limit, disable_caching_override);
@@ -357,7 +358,7 @@ class apriori_compiled_kernel_t final : public kernel_t {
 		grid::block_dimension_t block_size_limit = 0,
 		bool disable_caching_override = false) const override
 	{
-		return kernel::occupancy::detail_::min_grid_params_for_max_occupancy(
+		return kernel::apriori_compiled::occupancy::detail_::min_grid_params_for_max_occupancy(
 			ptr(), device_id(),
 			shared_memory_size_determiner,
 			block_size_limit, disable_caching_override);
@@ -387,7 +388,7 @@ class apriori_compiled_kernel_t final : public kernel_t {
 		memory::shared::size_t dynamic_shared_memory_per_block,
 		bool disable_caching_override = false) const override
 	{
-		return kernel::occupancy::detail_::max_active_blocks_per_multiprocessor(
+		return apriori_compiled::occupancy::detail_::max_active_blocks_per_multiprocessor(
 			ptr(),
 			block_size_in_threads,
 			dynamic_shared_memory_per_block,
@@ -396,7 +397,7 @@ class apriori_compiled_kernel_t final : public kernel_t {
 #endif // ! CAN_GET_APRIORI_KERNEL_HANDLE
 
 protected: // ctors & dtor
-	apriori_compiled_kernel_t(device::id_t device_id, context::handle_t primary_context_handle,
+	apriori_compiled_t(device::id_t device_id, context::handle_t primary_context_handle,
 		kernel::handle_t handle, const void *f, bool hold_pc_refcount_unit)
 	: kernel_t(device_id, primary_context_handle, handle, hold_pc_refcount_unit), ptr_(f) {
 		// TODO: Consider checking whether this actually is a device function, at all and in this context
@@ -404,34 +405,35 @@ class apriori_compiled_kernel_t final : public kernel_t {
 		assert(f != nullptr && "Attempt to construct a kernel object for a nullptr kernel function pointer");
 #endif
 	}
-	apriori_compiled_kernel_t(
+	apriori_compiled_t(
 		device::id_t device_id,
 		context::handle_t primary_context_handle,
 		const void *f,
 		bool hold_primary_context_refcount_unit)
-	: apriori_compiled_kernel_t(
+	: apriori_compiled_t(
 		device_id,
 		primary_context_handle,
-		kernel::detail_::get_handle(f),
+		apriori_compiled::detail_::get_handle(f),
 		f,
 		hold_primary_context_refcount_unit)
 	{ }
 
 public: // ctors & dtor
-	apriori_compiled_kernel_t(const apriori_compiled_kernel_t&) = default;
-	apriori_compiled_kernel_t(apriori_compiled_kernel_t&&) = default;
+	apriori_compiled_t(const apriori_compiled_t&) = default;
+	apriori_compiled_t(apriori_compiled_t&&) = default;
 
 public: // friends
-	friend apriori_compiled_kernel_t kernel::detail_::wrap(device::id_t, context::handle_t, kernel::handle_t, const void*, bool);
+	friend apriori_compiled_t apriori_compiled::detail_::wrap(device::id_t, context::handle_t, kernel::handle_t, const void*, bool);
 
 protected: // data members
 	const void *const ptr_;
-};
+}; // class apriori_compiled_t
+
+namespace apriori_compiled {
 
-namespace kernel {
 namespace detail_ {
 
-inline apriori_compiled_kernel_t wrap(
+inline apriori_compiled_t wrap(
 	device::id_t       device_id,
 	context::handle_t  primary_context_handle,
 	kernel::handle_t   f,
@@ -442,7 +444,7 @@ inline apriori_compiled_kernel_t wrap(
 }
 
 #if ! CAN_GET_APRIORI_KERNEL_HANDLE
-inline ::std::string identify(const apriori_compiled_kernel_t& kernel)
+inline ::std::string identify(const apriori_compiled_t& kernel)
 {
 	return "apriori-compiled kernel " + cuda::detail_::ptr_as_hex(kernel.ptr())
 		+ " in " + context::detail_::identify(kernel.context());
@@ -461,7 +463,7 @@ inline attribute_value_t get_attribute(const void* function_ptr, attribute_t att
 inline void set_attribute(const void* function_ptr, attribute_t attribute, attribute_value_t value)
 {
 	auto handle = detail_::get_handle(function_ptr);
-	return detail_::set_attribute_in_current_context(handle, attribute, value);
+	return kernel::detail_::set_attribute_in_current_context(handle, attribute, value);
 }
 
 inline attribute_value_t get_attribute(
@@ -484,15 +486,17 @@ inline void set_attribute(
 }
 #endif // CAN_GET_APRIORI_KERNEL_HANDLE
 
+} // namespace apriori_compiled
+
 /**
  * @note The returned kernel proxy object will keep the device's primary
  * context active while the kernel exists.
  */
 template<typename KernelFunctionPtr>
-apriori_compiled_kernel_t get(const device_t& device, KernelFunctionPtr function_ptr);
+apriori_compiled_t get(const device_t& device, KernelFunctionPtr function_ptr);
 
 template<typename KernelFunctionPtr>
-apriori_compiled_kernel_t get(context_t context, KernelFunctionPtr function_ptr);
+apriori_compiled_t get(context_t context, KernelFunctionPtr function_ptr);
 
 } // namespace kernel
 
diff --git a/src/cuda/api/multi_wrapper_impls/apriori_compiled_kernel.hpp b/src/cuda/api/multi_wrapper_impls/apriori_compiled_kernel.hpp
index b076b3ff..b4b5b734 100644
--- a/src/cuda/api/multi_wrapper_impls/apriori_compiled_kernel.hpp
+++ b/src/cuda/api/multi_wrapper_impls/apriori_compiled_kernel.hpp
@@ -15,23 +15,25 @@
 
 namespace cuda {
 
+namespace kernel {
+
 #if ! CAN_GET_APRIORI_KERNEL_HANDLE
 #if defined(__CUDACC__)
 
 // Unfortunately, the CUDA runtime API does not allow for computation of the grid parameters for maximum occupancy
 // from code compiled with a host-side-only compiler! See cuda_runtime.h for details
 
-inline kernel::attributes_t apriori_compiled_kernel_t::attributes() const
+inline apriori_compiled::attributes_t apriori_compiled_t::attributes() const
 {
 	// Note: assuming the primary context is active
 	CAW_SET_SCOPE_CONTEXT(context_handle_);
-	kernel::attributes_t function_attributes;
+	apriori_compiled::attributes_t function_attributes;
 	auto status = cudaFuncGetAttributes(&function_attributes, ptr_);
 	throw_if_error_lazy(status, "Failed obtaining attributes for a CUDA device function");
 	return function_attributes;
 }
 
-inline void apriori_compiled_kernel_t::set_cache_preference(multiprocessor_cache_preference_t preference) const
+inline void apriori_compiled_t::set_cache_preference(multiprocessor_cache_preference_t preference) const
 {
 	// Note: assuming the primary context is active
 	CAW_SET_SCOPE_CONTEXT(context_handle_);
@@ -41,7 +43,7 @@ inline void apriori_compiled_kernel_t::set_cache_preference(multiprocessor_cache
 		"CUDA device function");
 }
 
-inline void apriori_compiled_kernel_t::set_shared_memory_bank_size(
+inline void apriori_compiled_t::set_shared_memory_bank_size(
 	multiprocessor_shared_memory_bank_size_option_t  config) const
 {
 	// Note: assuming the primary context is active
@@ -50,7 +52,7 @@ inline void apriori_compiled_kernel_t::set_shared_memory_bank_size(
 	throw_if_error_lazy(result, "Failed setting shared memory bank size to " + ::std::to_string(config));
 }
 
-inline void apriori_compiled_kernel_t::set_attribute(kernel::attribute_t attribute, kernel::attribute_value_t value) const
+inline void apriori_compiled_t::set_attribute(attribute_t attribute, attribute_value_t value) const
 {
 	// Note: assuming the primary context is active
 	CAW_SET_SCOPE_CONTEXT(context_handle_);
@@ -70,9 +72,9 @@ inline void apriori_compiled_kernel_t::set_attribute(kernel::attribute_t attribu
 	throw_if_error_lazy(result, "Setting CUDA device function attribute " + ::std::to_string(attribute) + " to value " + ::std::to_string(value));
 }
 
-inline kernel::attribute_value_t apriori_compiled_kernel_t::get_attribute(kernel::attribute_t attribute) const
+inline attribute_value_t apriori_compiled_t::get_attribute(attribute_t attribute) const
 {
-	kernel::attributes_t attrs = attributes();
+	apriori_compiled::attributes_t attrs = attributes();
 	switch(attribute) {
 		case CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK:
 			return attrs.maxThreadsPerBlock;
@@ -98,9 +100,9 @@ inline kernel::attribute_value_t apriori_compiled_kernel_t::get_attribute(kernel
 			throw cuda::runtime_error(status::not_supported,
 				::std::string("Attribute ") +
 #ifdef NDEBUG
-					::std::to_string(static_cast<::std::underlying_type<kernel::attribute_t>::type>(attribute))
+					::std::to_string(static_cast<::std::underlying_type<attribute_t>::type>(attribute))
 #else
-					kernel::detail_::attribute_name(attribute)
+					detail_::attribute_name(attribute)
 #endif
 				+ " cannot be obtained for apriori-compiled kernels before CUDA version 11.0"
 			);
@@ -110,16 +112,15 @@ inline kernel::attribute_value_t apriori_compiled_kernel_t::get_attribute(kernel
 #endif // defined(__CUDACC__)
 #endif // ! CAN_GET_APRIORI_KERNEL_HANDLE
 
-namespace kernel {
-
+namespace apriori_compiled {
 
 namespace detail_ {
 
 template<typename KernelFunctionPtr>
-apriori_compiled_kernel_t get(
-	::cuda::device::id_t  device_id,
-	context::handle_t &   primary_context_handle,
-	KernelFunctionPtr     function_ptr)
+apriori_compiled_t get(
+	device::id_t         device_id,
+	context::handle_t &  primary_context_handle,
+	KernelFunctionPtr    function_ptr)
 {
 	static_assert(
 		::std::is_pointer<KernelFunctionPtr>::value
@@ -137,6 +138,8 @@ apriori_compiled_kernel_t get(
 
 } // namespace detail_
 
+} // namespace apriori_compiled
+
 
 /**
  * @brief Obtain a wrapped kernel object corresponding to a "raw" kernel function
@@ -148,10 +151,10 @@ apriori_compiled_kernel_t get(
  * context active while the kernel exists.
  */
 template<typename KernelFunctionPtr>
-apriori_compiled_kernel_t get(const device_t& device, KernelFunctionPtr function_ptr)
+apriori_compiled_t get(const device_t &device, KernelFunctionPtr function_ptr)
 {
 	auto primary_context_handle = device::primary_context::detail_::obtain_and_increase_refcount(device.id());
-	return detail_::get(device.id(), primary_context_handle, function_ptr);
+	return apriori_compiled::detail_::get(device.id(), primary_context_handle, function_ptr);
 }
 
 } // namespace kernel
@@ -160,9 +163,9 @@ namespace detail_ {
 
 template<>
 inline ::cuda::device::primary_context_t
-get_implicit_primary_context<apriori_compiled_kernel_t>(apriori_compiled_kernel_t kernel)
+get_implicit_primary_context<kernel::apriori_compiled_t>(kernel::apriori_compiled_t kernel)
 {
-	const kernel_t& kernel_ = kernel;
+	const kernel_t &kernel_ = kernel;
 	return get_implicit_primary_context(kernel_);
 }
 
diff --git a/src/cuda/api/multi_wrapper_impls/kernel.hpp b/src/cuda/api/multi_wrapper_impls/kernel.hpp
index eea3bbaf..22106e16 100644
--- a/src/cuda/api/multi_wrapper_impls/kernel.hpp
+++ b/src/cuda/api/multi_wrapper_impls/kernel.hpp
@@ -15,7 +15,6 @@
 #include "../pointer.hpp"
 #include "../primary_context.hpp"
 #include "../kernel.hpp"
-#include "../current_context.hpp"
 
 namespace cuda {
 
diff --git a/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp b/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp
index 523fc90f..e546430a 100644
--- a/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp
+++ b/src/cuda/api/multi_wrapper_impls/kernel_launch.hpp
@@ -101,8 +101,8 @@ inline void validate_block_dimension_compatibility(
 }
 
 template<typename... KernelParameters>
-void enqueue_launch_helper<apriori_compiled_kernel_t, KernelParameters...>::operator()(
-	const apriori_compiled_kernel_t&  wrapped_kernel,
+void enqueue_launch_helper<kernel::apriori_compiled_t, KernelParameters...>::operator()(
+	const kernel::apriori_compiled_t&  wrapped_kernel,
 	const stream_t &                  stream,
 	launch_configuration_t            launch_configuration,
 	KernelParameters &&...            parameters) const
@@ -334,7 +334,7 @@ inline grid::composite_dimensions_t min_grid_params_for_max_occupancy(
 } // namespace detail_
 
 inline grid::composite_dimensions_t min_grid_params_for_max_occupancy(
-	const apriori_compiled_kernel_t&  kernel,
+	const kernel::apriori_compiled_t&  kernel,
 	memory::shared::size_t            dynamic_shared_memory_size,
 	grid::block_dimension_t           block_size_limit,
 	bool                              disable_caching_override)
@@ -345,7 +345,7 @@ inline grid::composite_dimensions_t min_grid_params_for_max_occupancy(
 
 template <typename UnaryFunction>
 grid::composite_dimensions_t min_grid_params_for_max_occupancy(
-	const apriori_compiled_kernel_t&  kernel,
+	const kernel::apriori_compiled_t&  kernel,
 	UnaryFunction                     block_size_to_dynamic_shared_mem_size,
 	grid::block_dimension_t           block_size_limit,
 	bool                              disable_caching_override)