Skip to content

Commit

Permalink
Fixes #640: Reworked the range attribute, advice etc. code:
Browse files Browse the repository at this point in the history
* The user no longer sees `range_attribute_t` nor `advice_t` anywhere, they're within `detail_`
* Using a range namespace (although it's mostly for the `detail_` subnamespace), with `advice_t` and `attribute_t` types
* Now have a proper implementation of a setter for device-inspecific attributes
* Removed some redundant code
  • Loading branch information
eyalroz committed Apr 29, 2024
1 parent bcd0157 commit 3fc369e
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 101 deletions.
155 changes: 63 additions & 92 deletions src/cuda/api/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1988,63 +1988,15 @@ inline void zero(T* ptr)

namespace managed {

namespace range {

namespace detail_ {

using attribute_t = CUmem_range_attribute;
using advice_t = CUmem_advise;

template <typename T>
inline T get_scalar_range_attribute(const_region_t region, range_attribute_t attribute);

inline void advise(const_region_t region, advice_t advice, cuda::device::id_t device_id);
// inline void advise(const_region_t region, advice_t attribute);

template <typename GenericRegion>
struct region_helper : public GenericRegion {
using GenericRegion::GenericRegion;

bool is_read_mostly() const
{
return get_scalar_range_attribute<bool>(*this, CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY);
}

void designate_read_mostly() const
{
set_range_attribute(*this, CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY);
}

void undesignate_read_mostly() const
{
unset_range_attribute(*this, CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY);
}

device_t preferred_location() const;

void set_preferred_location(device_t &device) const;

void clear_preferred_location() const;
};

} // namespace detail_

/// A child class of the generic @ref region_t with some managed-memory-specific functionality
using region_t = detail_::region_helper<memory::region_t>;
/// A child class of the generic @ref const_region_t with some managed-memory-specific functionality
using const_region_t = detail_::region_helper<memory::const_region_t>;

/// Advice the CUDA driver that @p device is expected to access @p region
void advise_expected_access_by(const_region_t region, device_t& device);

/// Advice the CUDA driver that @p device is not expected to access @p region
void advise_no_access_expected_by(const_region_t region, device_t& device);

/// @return the devices which are marked by attribute as being the accessors of a specified memory region
template <typename Allocator = ::std::allocator<cuda::device_t> >
typename ::std::vector<device_t, Allocator> accessors(const_region_t region, const Allocator& allocator = Allocator() );

namespace detail_ {

template <typename T>
inline T get_scalar_range_attribute(const_region_t region, range_attribute_t attribute)
inline T get_scalar_attribute(const_region_t region, attribute_t attribute)
{
uint32_t attribute_value { 0 };
auto result = cuMemRangeGetAttribute(
Expand All @@ -2060,10 +2012,10 @@ inline void advise(const_region_t region, advice_t advice, cuda::device::id_t de
{
auto result = cuMemAdvise(device::address(region.start()), region.size(), advice, device_id);
throw_if_error_lazy(result, "Setting an attribute for a managed memory range at "
+ cuda::detail_::ptr_as_hex(region.start()));
+ cuda::detail_::ptr_as_hex(region.start()));
}

inline advice_t as_advice(range_attribute_t attribute, bool set)
inline advice_t as_advice(attribute_t attribute, bool set)
{
switch (attribute) {
case CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY:
Expand All @@ -2078,13 +2030,20 @@ inline advice_t as_advice(range_attribute_t attribute, bool set)
}
}

inline void set_range_attribute(const_region_t region, range_attribute_t settable_attribute, cuda::device::id_t device_id)
inline void set_attribute(const_region_t region, attribute_t settable_attribute, cuda::device::id_t device_id)
{
static constexpr const bool set { true };
advise(region, as_advice(settable_attribute, set), device_id);
}

inline void unset_range_attribute(const_region_t region, range_attribute_t settable_attribute)
inline void set_attribute(const_region_t region, attribute_t settable_attribute)
{
static constexpr const bool set { true };
static constexpr const cuda::device::id_t dummy_device_id { 0 };
advise(region, as_advice(settable_attribute, set), dummy_device_id);
}

inline void unset_attribute(const_region_t region, attribute_t settable_attribute)
{
static constexpr const bool unset { false };
static constexpr const cuda::device::id_t dummy_device_id { 0 };
Expand All @@ -2093,17 +2052,61 @@ inline void unset_range_attribute(const_region_t region, range_attribute_t setta

} // namespace detail_

} // namespace range

namespace detail_ {

template <typename GenericRegion>
struct region_helper : public GenericRegion {
using GenericRegion::GenericRegion;

bool is_read_mostly() const
{
return range::detail_::get_scalar_attribute<bool>(*this, CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY);
}

void designate_read_mostly() const
{
range::detail_::set_attribute(*this, CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY);
}

void undesignate_read_mostly() const
{
range::detail_::unset_attribute(*this, CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY);
}

device_t preferred_location() const;
void set_preferred_location(device_t& device) const;
void clear_preferred_location() const;
};

} // namespace detail_

/// A child class of the generic @ref region_t with some managed-memory-specific functionality
using region_t = detail_::region_helper<memory::region_t>;
/// A child class of the generic @ref const_region_t with some managed-memory-specific functionality
using const_region_t = detail_::region_helper<memory::const_region_t>;

/// Advice the CUDA driver that @p device is expected to access @p region
void advise_expected_access_by(const_region_t region, device_t& device);

/// Advice the CUDA driver that @p device is not expected to access @p region
void advise_no_access_expected_by(const_region_t region, device_t& device);

/// @return the devices which are marked by attribute as being the accessors of a specified memory region
template <typename Allocator = ::std::allocator<cuda::device_t> >
typename ::std::vector<device_t, Allocator> expected_accessors(const_region_t region, const Allocator& allocator = Allocator() );

/// Kinds of managed memory region attachments
enum class attachment_t : unsigned {
global = CU_MEM_ATTACH_GLOBAL,
host = CU_MEM_ATTACH_HOST,
single_stream = CU_MEM_ATTACH_SINGLE,
};


namespace detail_ {

inline region_t allocate_in_current_context(
inline managed::region_t allocate_in_current_context(
size_t num_bytes,
initial_visibility_t initial_visibility = initial_visibility_t::to_all_devices)
{
Expand Down Expand Up @@ -2142,7 +2145,7 @@ inline void free(void* ptr)
}

/// @copydoc free(void*)
inline void free(region_t region)
inline void free(managed::region_t region)
{
free(region.start());
}
Expand All @@ -2160,7 +2163,7 @@ struct deleter {
void operator()(void* ptr) const { detail_::free(ptr); }
};

inline region_t allocate(
inline managed::region_t allocate(
context::handle_t context_handle,
size_t num_bytes,
initial_visibility_t initial_visibility = initial_visibility_t::to_all_devices)
Expand Down Expand Up @@ -2237,38 +2240,6 @@ inline void free(region_t region)
free(region.start());
}

namespace advice {
/// Kinds of advice-type attributes for memory regions, w.r.t. devices
enum kind_t {
/// The device will mostly read, and only occasionally, write, to the memory region
read_mostly = CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY,

/// The device is the preferred location of the range
preferred_location = CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION,

/// Data in the memory range will be accessed by the device, so prevent page faults as much as possible
accessor = CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY,

// Note: CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION is never set
};

namespace detail_ {

inline void set(const_region_t region, kind_t advice, cuda::device::id_t device_id)
{
auto result = cuMemAdvise(device::address(region.start()), region.size(),
static_cast<managed::detail_::advice_t>(advice), device_id);
throw_if_error_lazy(result, "Setting advice on a (managed) memory region at"
+ cuda::detail_::ptr_as_hex(region.start()) + " w.r.t. " + cuda::device::detail_::identify(device_id));
}

} // namespace detail_

/// Advise the CUDA driver about aspects of the use of a region of memory w.r.t. a particular device
void set(const_region_t region, kind_t advice, const device_t& device);

} // namespace advice

namespace async {

namespace detail_ {
Expand Down
12 changes: 6 additions & 6 deletions src/cuda/api/multi_wrapper_impls/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,36 +198,36 @@ namespace detail_ {
template <typename GenericRegion>
inline device_t region_helper<GenericRegion>::preferred_location() const
{
auto device_id = get_scalar_range_attribute<bool>(*this, CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION);
auto device_id = range::detail_::get_scalar_attribute<bool>(*this, CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION);
return cuda::device::get(device_id);
}

template <typename GenericRegion>
inline void region_helper<GenericRegion>::set_preferred_location(device_t& device) const
{
set_range_attribute(*this,CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION, device.id());
range::detail_::set_attribute(*this,CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION, device.id());
}

template <typename GenericRange>
inline void region_helper<GenericRange>::clear_preferred_location() const
{
unset_range_attribute(*this, CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION);
range::detail_::unset_attribute(*this, CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION);
}

} // namespace detail_

inline void advise_expected_access_by(const_region_t region, device_t& device)
{
detail_::advise(region, CU_MEM_ADVISE_SET_ACCESSED_BY, device.id());
range::detail_::advise(region, CU_MEM_ADVISE_SET_ACCESSED_BY, device.id());
}

inline void advise_no_access_expected_by(const_region_t region, device_t& device)
{
detail_::advise(region, CU_MEM_ADVISE_UNSET_ACCESSED_BY, device.id());
range::detail_::advise(region, CU_MEM_ADVISE_UNSET_ACCESSED_BY, device.id());
}

template <typename Allocator>
::std::vector<device_t, Allocator> accessors(const_region_t region, const Allocator& allocator)
::std::vector<device_t, Allocator> expected_accessors(const_region_t region, const Allocator& allocator)
{
auto num_devices = cuda::device::count();
::std::vector<device_t, Allocator> devices(num_devices, allocator);
Expand Down
3 changes: 0 additions & 3 deletions src/cuda/api/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -678,9 +678,6 @@ enum class initial_visibility_t {
to_supporters_of_concurrent_managed_access,
};

/// A specifier of one of the attributes of managed memory regions
using range_attribute_t = CUmem_range_attribute;

} // namespace managed

#if CUDA_VERSION >= 11070
Expand Down

0 comments on commit 3fc369e

Please sign in to comment.