Skip to content

Commit

Permalink
Explicitly use __ldg on pointer derefs
Browse files Browse the repository at this point in the history
  • Loading branch information
fthaler committed Jul 24, 2024
1 parent b6e3dce commit 40e7559
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 2 deletions.
10 changes: 10 additions & 0 deletions include/gridtools/fn/cartesian.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
#include "./common_interface.hpp"
#include "./executor.hpp"

#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
#include "../common/cuda_type_traits.hpp"
#endif

namespace gridtools::fn {
namespace cartesian::dim {
using i = integral_constant<int, 0>;
Expand Down Expand Up @@ -44,6 +48,12 @@ namespace gridtools::fn {

template <class Tag, class Ptr, class Strides>
GT_FUNCTION auto deref(iterator<Tag, Ptr, Strides> const &it) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
if constexpr (std::is_pointer_v<decltype(it.m_ptr)> &&
is_texture_type<std::decay_t<std::remove_pointer_t<decltype(it.m_ptr)>>>::value) {
return __ldg(it.m_ptr);
}
#endif
return *it.m_ptr;
}

Expand Down
12 changes: 11 additions & 1 deletion include/gridtools/fn/sid_neighbor_table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
#include "../fn/unstructured.hpp"
#include "../sid/concept.hpp"

#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
#include "../common/cuda_type_traits.hpp"
#endif

namespace gridtools::fn::sid_neighbor_table {
namespace sid_neighbor_table_impl_ {
template <class IndexDimension,
Expand Down Expand Up @@ -46,7 +50,13 @@ namespace gridtools::fn::sid_neighbor_table {

sid::shift(ptr, sid::get_stride<IndexDimension>(table.strides), index);
for (std::size_t element_idx = 0; element_idx < MaxNumNeighbors; ++element_idx) {
neighbors[element_idx] = *ptr;
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
if constexpr (std::is_pointer_v<decltype(ptr)> &&
is_texture_type<std::decay_t<std::remove_pointer_t<decltype(ptr)>>>::value)
neighbors[element_idx] = __ldg(ptr);
else
#endif
neighbors[element_idx] = *ptr;
sid::shift(ptr, sid::get_stride<NeighborDimension>(table.strides), 1_c);
}
return neighbors;
Expand Down
13 changes: 12 additions & 1 deletion include/gridtools/fn/unstructured.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
#include "./executor.hpp"
#include "./neighbor_table.hpp"

#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
#include "../common/cuda_type_traits.hpp"
#endif

namespace gridtools::fn {
namespace unstructured::dim {
using horizontal = integral_constant<int, 0>;
Expand Down Expand Up @@ -80,7 +84,14 @@ namespace gridtools::fn {
GT_FUNCTION constexpr auto deref(iterator<Tag, Ptr, Strides, Domain> const &it) {
GT_PROMISE(can_deref(it));
decltype(auto) stride = host_device::at_key<Tag>(sid::get_stride<dim::horizontal>(it.m_strides));
return *sid::shifted(it.m_ptr, stride, it.m_index);
auto ptr = sid::shifted(it.m_ptr, stride, it.m_index);
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
if constexpr (std::is_pointer_v<decltype(ptr)> &&
is_texture_type<std::decay_t<std::remove_pointer_t<decltype(ptr)>>>::value) {
return __ldg(ptr);
}
#endif
return *ptr;
}

template <class Tag, class Ptr, class Strides, class Domain, class Conn, class Offset>
Expand Down

0 comments on commit 40e7559

Please sign in to comment.