Skip to content

Commit

Permalink
Regards #3: Multiple doxygen, regular-comment, and spacing fixes, add…
Browse files Browse the repository at this point in the history
…itions, improvements and removal of redundancies.
  • Loading branch information
eyalroz committed Mar 25, 2024
1 parent 9d6521f commit 08e8877
Show file tree
Hide file tree
Showing 44 changed files with 327 additions and 245 deletions.
1 change: 0 additions & 1 deletion src/cuda/api/common_ptx_compilation_options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ struct common_ptx_compilation_options_t {

/**
* The minimum number of threads per block which the compiler should target
* @note can't be combined with a value for the @ref target property.
*/
optional<grid::block_dimension_t> min_num_threads_per_block{};

Expand Down
11 changes: 6 additions & 5 deletions src/cuda/api/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,9 @@ inline void synchronize(const context_t& context);
* @note By default this class has RAII semantics, i.e. it creates a
* context on construction and destroys it on destruction, and isn't merely
* an ephemeral wrapper one could apply and discard; but this second kind of
* semantics is also supported, through the @ref context_t::holds_refcount_unit_ field.
* semantics is also supported, through the @ref context_t::owning_ field.
*
* @note A context is a specific to a device; see, therefore, also @ref device_t .
* @note A context is a specific to a device; see, therefore, also {@ref cuda::device_t}.
* @note This class is a "reference type", not a "value type". Therefore, making changes
* to properties of the context is a const-respecting operation on this class.
*/
Expand All @@ -228,9 +228,9 @@ class context_t {
public: // inner classes

/**
* @brief A class to create a faux member in a @ref device_t, in lieu of an in-class
* @brief A class to create a faux member in a @ref context_t, in lieu of an in-class
* namespace (which C++ does not support); whenever you see a function
* `my_dev.memory::foo()`, think of it as a `my_dev::memory::foo()`.
* `my_context.memory::foo()`, think of it as a `my_dev::memory::foo()`.
*/
class global_memory_type {
protected: // data members
Expand Down Expand Up @@ -492,7 +492,7 @@ class context_t {
* Gets the synchronization policy to be used for threads synchronizing
* with this CUDA context.
*
* @note see @ref host_thread_sync_scheduling_policy_t
* @note see @ref context::host_thread_sync_scheduling_policy_t
* for a description of the various policies.
*/
context::host_thread_sync_scheduling_policy_t sync_scheduling_policy() const
Expand Down Expand Up @@ -664,6 +664,7 @@ class context_t {
protected: // data members
device::id_t device_id_;
context::handle_t handle_;
/// When true, the object is a valud type, and the context must be destroyed on destruction
bool owning_;
// this field is mutable only for enabling move construction; other
// than in that case it must not be altered
Expand Down
2 changes: 1 addition & 1 deletion src/cuda/api/copy_parameters.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* @file
*
* @brief The @ref copy_parameters class template and related definitions.
* @brief The @ref cuda::memory::copy_parameters_t class template and related definitions.
*/
#ifndef CUDA_API_WRAPPERS_COPY_PARAMETERS_HPP
#define CUDA_API_WRAPPERS_COPY_PARAMETERS_HPP
Expand Down
10 changes: 8 additions & 2 deletions src/cuda/api/detail/region.hpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
/**
* @file
*
* @brief A memory region class (@ref `cuda::memory::region`) and related
* functionality.
* @brief A memory region class (@ref cuda::memory::region_t and @ref
* cuda::memory::const_region_t) and related functionality.
*
* @note There is no CUDA-specific functionality here, and this class could be
* used irrespective of the CUDA APIs and GPUs in general.
Expand Down Expand Up @@ -121,6 +121,9 @@ bool operator!=(const base_region_t<T>& lhs, const base_region_t<T>& rhs)

} // namespace detail_

/**
* An untyped, but sized, region in some memory space
*/
struct region_t : public detail_::base_region_t<void> {
using base_region_t<void>::base_region_t;
region_t subregion(size_t offset_in_bytes, size_t size_in_bytes) const
Expand All @@ -130,6 +133,9 @@ struct region_t : public detail_::base_region_t<void> {
}
};

/**
* An untyped, but sized, region with const-constrained data in some memory space
*/
struct const_region_t : public detail_::base_region_t<void const> {
using base_region_t<void const>::base_region_t;
const_region_t(region_t r) : base_region_t(r.start(), r.size()) {}
Expand Down
2 changes: 1 addition & 1 deletion src/cuda/api/detail/span.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* @file
*
* @brief Contains an implementation of an std::span-like class, @ref `cuda::span`
* @brief Contains an implementation of an std::span-like class, @ref cuda::span
*
* @note When compiling with C++20 or later, the actual std::span is used instead
*/
Expand Down
2 changes: 1 addition & 1 deletion src/cuda/api/detail/unique_span.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* @file
*
* @brief Contains an implementation of an std::dynarray-like class, @ref `cuda::unique_span`
* @brief Contains the class @ref cuda::unique_span
*
* @note There is no CUDA-specific code in this file; the class is usable entirely independently
* of the CUDA APIs and GPUs in general
Expand Down
39 changes: 30 additions & 9 deletions src/cuda/api/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,39 +514,60 @@ class device_t {
return id_;
}

stream_t default_stream(bool hold_primary_context_refcount_unit = false) const;

/**
* See @ref cuda::stream::create()
* Obtain a wrapper for the (always-existing) default stream within
* the device' primary context.
*
* @param hold_primary_context_refcount_unit when true, the returned stream
* wrapper will keep the device' primary context in existence during
* its lifetime.
*/
stream_t default_stream(bool hold_primary_context_refcount_unit = false) const;

/// See @ref cuda::stream::create()
stream_t create_stream(
bool will_synchronize_with_default_stream,
stream::priority_t priority = cuda::stream::default_priority) const;

/**
* See @ref cuda::event::create()
*/
/// See @ref cuda::event::create()
event_t create_event(
bool uses_blocking_sync = event::sync_by_busy_waiting, // Yes, that's the runtime default
bool records_timing = event::do_record_timings,
bool interprocess = event::not_interprocess);

/// See @ref cuda::context::create()
context_t create_context(
context::host_thread_sync_scheduling_policy_t sync_scheduling_policy = context::heuristic,
bool keep_larger_local_mem_after_resize = false) const;

#if CUDA_VERSION >= 11020

/// See @ref cuda::memory::pool::create()
template <memory::pool::shared_handle_kind_t Kind = memory::pool::shared_handle_kind_t::no_export>
memory::pool_t create_memory_pool() const;

#endif

template<typename KernelFunction, typename ... KernelParameters>
/**
* Launch a kernel on the default stream of the device' primary context
*
* @tparam Kernel May be either a plain function type (for a `__global__` function
* accessible to the translation unit, or (a reference to) any subclass of
* ` `cuda::kernel_t`.
* @param kernel_function
* the kernel to launch; may be either a (`__global__`) function pointer,
* or a kernel proxy class.
* @param launch_configuration
* the configuration with which to launch the kernel;
* @param arguments
* the arguments with which to launch @p kernel (but note that references
* are not maintained).
*/
template<typename Kernel, typename ... KernelParameters>
void launch(
KernelFunction kernel_function,
Kernel kernel,
launch_configuration_t launch_configuration,
KernelParameters... parameters) const;
KernelParameters... arguments) const;

/**
* Determines the range of possible priorities for streams on this device.
Expand Down
12 changes: 6 additions & 6 deletions src/cuda/api/device_properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,14 @@ constexpr compute_capability_t make_compute_capability(unsigned major, unsigned
/**
* @brief A structure holding a collection various properties of a device
*
* @note Somewhat annoyingly, CUDA devices have attributes, properties and flags.
* @note Somewhat annoyingly, CUDA devices have all of attributes, properties and flags.
* Attributes have integral number values; properties have all sorts of values,
* including arrays and limited-length strings (see
* @ref cuda::device::properties_t), and flags are either binary or
* small-finite-domain type fitting into an overall flagss value (see
* @ref cuda::device_t::flags_t). Flags and properties are obtained all at once,
* including arrays and limited-length strings, and flags are actually associated with
* a device's primary context, as it is actually _contexts_ which have flags (which are
* either binary or small-finite-domain type fitting into an overall flags value:
* {@ref context::flags_t}). Flags and properties are obtained all at once (the latter,
* using the runtime API),
* attributes are more one-at-a-time.
*
*/
struct properties_t : public cudaDeviceProp {

Expand Down
13 changes: 5 additions & 8 deletions src/cuda/api/error.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ namespace status {
* @note unfortunately, this enum can't inherit from @ref cuda::status_t
*/
enum named_t : ::std::underlying_type<status_t>::type {
success = CUDA_SUCCESS,
success = CUDA_SUCCESS, /// Operation was successful; no errors
memory_allocation_failure = CUDA_ERROR_OUT_OF_MEMORY, // corresponds to cudaErrorMemoryAllocation
not_yet_initialized = CUDA_ERROR_NOT_INITIALIZED, // corresponds to cudaErrorInitializationError
already_deinitialized = CUDA_ERROR_DEINITIALIZED, // corresponds to cudaErrorCudartUnloading
Expand Down Expand Up @@ -336,7 +336,7 @@ do { \
* Do nothing... unless the status indicates an error, in which case
* a @ref cuda::runtime_error exception is thrown
*
* @param status should be @ref cuda::status::success - otherwise an exception is thrown
* @param status should be @ref status::success - otherwise an exception is thrown
* @param message An extra description message to add to the exception
*/
inline void throw_if_error(status_t status, const ::std::string& message) noexcept(false)
Expand Down Expand Up @@ -453,16 +453,13 @@ inline void ensure_none(const char *message) noexcept(false)
}

/**
* @brief Does nothing (unless throwing an exception)
* @brief Does nothing (except possibly throwing an exception)
*
* @note similar to @ref throw_if_error, but uses the CUDA Runtime API's internal
* state
*
* @throws cuda::runtime_error if the CUDA runtime API has
* encountered previously encountered an (uncleared) error
*
* @param clear_any_error When true, clears the CUDA Runtime API's state from
* recalling errors arising from before this oment
* @throws cuda::runtime_error if the CUDA runtime API has encountered previously
* encountered an (uncleared) error
*/
inline void ensure_none() noexcept(false)
{
Expand Down
17 changes: 10 additions & 7 deletions src/cuda/api/event.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,17 @@ namespace event {
* @note This is a named constructor idiom, existing of direct access to the ctor
* of the same signature, to emphasize that a new event is _not_ created.
*
* @param device_id Index of the device to which the event relates
* @param context_handle Handle of the context in which this event was created
* @param event_handle handle of the pre-existing event
* @param take_ownership When set to `false`, the CUDA event
* will not be destroyed along with proxy; use this setting
* when temporarily working with a stream existing irrespective of
* the current context and outlasting it. When set to `true`,
* the proxy class will act as it does usually, destroying the event
* when being destructed itself.
* @param take_ownership When set to `false`, the CUDA event will not be destroyed
* along with proxy; use this setting when temporarily working with a stream
* existing irrespective of the current context and outlasting it. When set to
* `true`, the proxy class will act as it does usually, destroying the event
* when being destructed itself.
* @param hold_pc_refcount_unit when the event's context is a device's primary
* context, this controls whether that context must be kept active while
* the event continues to exist.
* @return an event wrapper associated with the specified event
*/
event_t wrap(
Expand Down Expand Up @@ -125,7 +128,7 @@ inline void wait(const event_t& event);
* @note By default this class has RAII semantics, i.e. it has the runtime create
* an event on construction and destroy it on destruction, and isn't merely
* an ephemeral wrapper one could apply and discard; but this second kind of
* semantics is also (sort of) supported, through the @ref event_t::owning field.
* semantics is also (sort of) supported, through the @ref event_t::owning_ field.
*/
class event_t {

Expand Down
7 changes: 5 additions & 2 deletions src/cuda/api/ipc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ inline void unmap(void* ipc_mapped_ptr)
*
* @param device_ptr beginning of the region of memory
* to be shared with other processes
* @return a handle which another process can call @ref import()
* @return a handle which another process can call @ref detail_::import()
* on to obtain a device pointer it can use
*/
inline ptr_handle_t export_(void* device_ptr)
Expand Down Expand Up @@ -302,15 +302,18 @@ inline handle_t export_(const event_t& event);
* @param event_ipc_handle the handle obtained via inter-process communications
*/
///@{

/**
* @param device the device with which the imported event is associated
*/
inline event_t import(const device_t& device, const handle_t& event_ipc_handle);

/**
* @param context the device-context with which the imported event is associated
* @param event_ipc_handle The handle created by another process, to be imported
* @return An event usable in the current process
*/
inline event_t import(const context_t& device, const handle_t& event_ipc_handle);
inline event_t import(const context_t& context, const handle_t& event_ipc_handle);
///@}

} // namespace ipc
Expand Down
26 changes: 18 additions & 8 deletions src/cuda/api/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "primary_context.hpp"
#include "current_context.hpp"
#include "device_properties.hpp"
#include "error.hpp"
#include "types.hpp"

Expand Down Expand Up @@ -39,15 +40,18 @@ using shared_memory_size_determiner_t = size_t (CUDA_CB *)(int block_size);
* @note This is a named constructor idiom, existing of direct access to the ctor
* of the same signature, to emphasize that a new kernel is _not_ somehow created.
*
* @param id Device on which the texture is located
* @param context_handle Handle of the context in which the kernel was created or added
* @param handle raw CUDA driver handle for the kernel
* @param device_id Device of the context in which the kernel was created
* @param context_handle Handle of the context in which the kernel was created
* @param handle Raw CUDA driver handle for the kernel
* @param hold_pc_refcount_unit when the event's context is a device's primary
* context, this controls whether that context must be kept active while the
* event continues to exist.
* @return a wrapper object associated with the specified kernel
*/
kernel_t wrap(
device::id_t device_id,
context::handle_t context_id,
kernel::handle_t f,
context::handle_t context_handle,
kernel::handle_t handle,
bool hold_primary_context_refcount_unit = false);

namespace detail_ {
Expand Down Expand Up @@ -115,12 +119,18 @@ inline attribute_value_t get_attribute(const kernel_t& kernel, attribute_t attri
class kernel_t {

public: // getters

/// Get (a proxy for) the context in which this kernel is defined
context_t context() const noexcept;
/// Get (a proxy for) the device for (a context of) which this kernel is defined
device_t device() const noexcept;

/// Get the id of the device for (a context of) which this kernel is defined
device::id_t device_id() const noexcept { return device_id_; }
/// Get the raw handle of the context in which this kernel is defined
context::handle_t context_handle() const noexcept { return context_handle_; }
#if CAN_GET_APRIORI_KERNEL_HANDLE
/// Get the raw (intra-context) CUDA handle for this kernel
kernel::handle_t handle() const noexcept { return handle_; }
#else
kernel::handle_t handle() const
Expand Down Expand Up @@ -375,11 +385,11 @@ namespace kernel {

inline kernel_t wrap(
device::id_t device_id,
context::handle_t context_id,
kernel::handle_t f,
context::handle_t context_handle,
kernel::handle_t handle,
bool hold_primary_context_refcount_unit)
{
return kernel_t{ device_id, context_id, f, hold_primary_context_refcount_unit };
return kernel_t{device_id, context_handle, handle, hold_primary_context_refcount_unit };
}

inline attribute_value_t get_attribute(const kernel_t& kernel, attribute_t attribute)
Expand Down
9 changes: 2 additions & 7 deletions src/cuda/api/kernel_launch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,6 @@
* @ref cuda::launch_configuration_t .
* </ul>
*
* @note You'd probably better avoid launching kernels using these
* function directly, and go through the @ref cuda::stream_t or @ref cuda::device_t
* proxy classes' launch mechanism (e.g.
* `my_stream.enqueue.kernel_launch(...)`).
*
* @note Even though when you use this wrapper, your code will not have the silly
* chevron, you can't use it from regular `.cpp` files compiled with your host
* compiler. Hence the `.cuh` extension. You _can_, however, safely include this
Expand Down Expand Up @@ -335,8 +330,8 @@ void launch(

/**
* Launch a kernel with the arguments pre-marshalled into the (main) form
* which @ref cuLaunchKernel accepts variables in: A null-terminated sequence
* of (possibly const) `void *`'s to the argument values.
* which the CUDA driver's launch primitive accepts variables in: A null-
* terminated sequence of (possibly const) `void *`'s to the argument values.
*
* @tparam SpanOfConstVoidPtrLike
* Type of the container for the marshalled arguments; typically, this
Expand Down
Loading

0 comments on commit 08e8877

Please sign in to comment.