Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Use CUB's new CDP macros.
Browse files Browse the repository at this point in the history
  • Loading branch information
alliepiper committed Apr 27, 2022
1 parent e9953c8 commit c2d5261
Show file tree
Hide file tree
Showing 24 changed files with 962 additions and 2,070 deletions.
2 changes: 1 addition & 1 deletion dependencies/cub
Submodule cub updated 34 files
+2 −1 cmake/CubCudaConfig.cmake
+84 −0 cub/detail/cdp_dispatch.cuh
+74 −0 cub/detail/detect_cuda_runtime.cuh
+1 −7 cub/detail/device_synchronize.cuh
+2 −2 cub/detail/type_traits.cuh
+3 −12 cub/device/dispatch/dispatch_histogram.cuh
+0 −30 cub/device/dispatch/dispatch_radix_sort.cuh
+0 −27 cub/device/dispatch/dispatch_reduce.cuh
+0 −25 cub/device/dispatch/dispatch_reduce_by_key.cuh
+0 −10 cub/device/dispatch/dispatch_rle.cuh
+0 −12 cub/device/dispatch/dispatch_scan.cuh
+0 −11 cub/device/dispatch/dispatch_scan_by_key.cuh
+40 −51 cub/device/dispatch/dispatch_segmented_sort.cuh
+0 −24 cub/device/dispatch/dispatch_select_if.cuh
+0 −8 cub/device/dispatch/dispatch_spmv_orig.cuh
+5 −14 cub/device/dispatch/dispatch_unique_by_key.cuh
+6 −14 cub/util_arch.cuh
+3 −5 cub/util_debug.cuh
+1 −44 cub/util_device.cuh
+0 −9 experimental/defunct/test_device_seg_reduce.cu
+42 −23 test/CMakeLists.txt
+0 −39 test/test_allocator.cu
+0 −71 test/test_device_histogram.cu
+719 −518 test/test_device_radix_sort.cu
+305 −172 test/test_device_reduce.cu
+125 −83 test/test_device_reduce_by_key.cu
+127 −87 test/test_device_run_length_encode.cu
+107 −93 test/test_device_scan.cu
+124 −106 test/test_device_scan_by_key.cu
+10 −6 test/test_device_segmented_sort.cu
+122 −78 test/test_device_select_if.cu
+96 −68 test/test_device_select_unique.cu
+110 −73 test/test_device_select_unique_by_key.cu
+85 −15 test/test_util.h
13 changes: 0 additions & 13 deletions thrust/system/cuda/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,6 @@
// older releases. This header will always pull in version info:
#include <cub/util_namespace.cuh>

#if defined(__CUDACC__) || defined(_NVHPC_CUDA)
# if !defined(__CUDA_ARCH__) || defined(__CUDACC_RDC__)
# define __THRUST_HAS_CUDART__ 1
# define THRUST_RUNTIME_FUNCTION __host__ __device__ __forceinline__
# else
# define __THRUST_HAS_CUDART__ 0
# define THRUST_RUNTIME_FUNCTION __host__ __forceinline__
# endif
#else
# define __THRUST_HAS_CUDART__ 0
# define THRUST_RUNTIME_FUNCTION __host__ __forceinline__
#endif

#ifdef THRUST_AGENT_ENTRY_NOINLINE
#define THRUST_AGENT_ENTRY_INLINE_ATTR __noinline__
#else
Expand Down
56 changes: 24 additions & 32 deletions thrust/system/cuda/detail/adjacent_difference.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,22 @@
#include <thrust/detail/config.h>

#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
#include <thrust/system/cuda/config.h>

#include <thrust/detail/cstdint.h>
#include <thrust/detail/minmax.h>
#include <thrust/detail/mpl/math.h>
#include <thrust/detail/temporary_array.h>
#include <thrust/system/cuda/detail/util.h>
#include <cub/device/device_select.cuh>
#include <cub/block/block_adjacent_difference.cuh>
#include <thrust/distance.h>
#include <thrust/functional.h>
#include <thrust/system/cuda/config.h>
#include <thrust/system/cuda/detail/core/agent_launcher.h>
#include <thrust/system/cuda/detail/par_to_seq.h>
#include <thrust/system/cuda/detail/dispatch.h>
#include <thrust/functional.h>
#include <thrust/distance.h>
#include <thrust/detail/mpl/math.h>
#include <thrust/detail/minmax.h>
#include <thrust/system/cuda/detail/par_to_seq.h>
#include <thrust/system/cuda/detail/util.h>

#include <cub/block/block_adjacent_difference.cuh>
#include <cub/detail/cdp_dispatch.cuh>
#include <cub/device/device_select.cuh>
#include <cub/util_math.cuh>

THRUST_NAMESPACE_BEGIN
Expand Down Expand Up @@ -362,7 +363,7 @@ namespace __adjacent_difference {
class OutputIt,
class BinaryOp,
class Size>
cudaError_t THRUST_RUNTIME_FUNCTION
cudaError_t CUB_RUNTIME_FUNCTION
doit_step(void * d_temp_storage,
size_t & temp_storage_bytes,
InputIt first,
Expand Down Expand Up @@ -436,7 +437,7 @@ namespace __adjacent_difference {
typename InputIt,
typename OutputIt,
typename BinaryOp>
OutputIt THRUST_RUNTIME_FUNCTION
OutputIt CUB_RUNTIME_FUNCTION
adjacent_difference(execution_policy<Derived>& policy,
InputIt first,
InputIt last,
Expand Down Expand Up @@ -490,27 +491,18 @@ adjacent_difference(execution_policy<Derived> &policy,
OutputIt result,
BinaryOp binary_op)
{
OutputIt ret = result;
if (__THRUST_HAS_CUDART__)
{
ret = __adjacent_difference::adjacent_difference(policy,
first,
last,
result,
binary_op);
}
else
{
#if !__THRUST_HAS_CUDART__
ret = thrust::adjacent_difference(cvt_to_seq(derived_cast(policy)),
first,
last,
result,
binary_op);
#endif
}

return ret;
CUB_CDP_DISPATCH(
(result = __adjacent_difference::adjacent_difference(policy,
first,
last,
result,
binary_op);),
(result = thrust::adjacent_difference(cvt_to_seq(derived_cast(policy)),
first,
last,
result,
binary_op);));
return result;
}

template <class Derived,
Expand Down
46 changes: 17 additions & 29 deletions thrust/system/cuda/detail/copy.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,14 @@

#include <thrust/detail/config.h>

#include <thrust/advance.h>

#include <thrust/system/cuda/config.h>
#include <thrust/system/cuda/detail/execution_policy.h>
#include <thrust/system/cuda/detail/cross_system.h>

#include <cub/detail/cdp_dispatch.cuh>

THRUST_NAMESPACE_BEGIN

template <typename DerivedPolicy, typename InputIt, typename OutputIt>
Expand Down Expand Up @@ -117,22 +121,11 @@ copy(execution_policy<System> &system,
InputIterator last,
OutputIterator result)
{
OutputIterator ret = result;
if (__THRUST_HAS_CUDART__)
{
ret = __copy::device_to_device(system, first, last, result);
}
else
{
#if !__THRUST_HAS_CUDART__
ret = thrust::copy(cvt_to_seq(derived_cast(system)),
first,
last,
result);
#endif
}

return ret;
CUB_CDP_DISPATCH(
(result = __copy::device_to_device(system, first, last, result);),
(result =
thrust::copy(cvt_to_seq(derived_cast(system)), first, last, result);));
return result;
} // end copy()

__thrust_exec_check_disable__
Expand All @@ -146,19 +139,14 @@ copy_n(execution_policy<System> &system,
Size n,
OutputIterator result)
{
OutputIterator ret = result;
if (__THRUST_HAS_CUDART__)
{
ret = __copy::device_to_device(system, first, first + n, result);
}
else
{
#if !__THRUST_HAS_CUDART__
ret = thrust::copy_n(cvt_to_seq(derived_cast(system)), first, n, result);
#endif
}

return ret;
CUB_CDP_DISPATCH(
(result = __copy::device_to_device(system,
first,
thrust::next(first, n),
result);),
(result =
thrust::copy_n(cvt_to_seq(derived_cast(system)), first, n, result);));
return result;
} // end copy_n()
#endif

Expand Down
103 changes: 40 additions & 63 deletions thrust/system/cuda/detail/copy_if.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,20 @@
#include <thrust/detail/config.h>

#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC
#include <thrust/system/cuda/config.h>

#include <thrust/detail/alignment.h>
#include <thrust/detail/cstdint.h>
#include <thrust/detail/function.h>
#include <thrust/detail/temporary_array.h>
#include <thrust/system/cuda/detail/util.h>
#include <cub/device/device_select.cuh>
#include <thrust/distance.h>
#include <thrust/system/cuda/config.h>
#include <thrust/system/cuda/detail/core/agent_launcher.h>
#include <thrust/system/cuda/detail/core/util.h>
#include <thrust/system/cuda/detail/par_to_seq.h>
#include <thrust/detail/function.h>
#include <thrust/distance.h>
#include <thrust/detail/alignment.h>
#include <thrust/system/cuda/detail/util.h>

#include <cub/detail/cdp_dispatch.cuh>
#include <cub/device/device_select.cuh>
#include <cub/util_math.cuh>

THRUST_NAMESPACE_BEGIN
Expand Down Expand Up @@ -598,17 +599,17 @@ namespace __copy_if {
class Predicate,
class Size,
class NumSelectedOutIt>
static cudaError_t THRUST_RUNTIME_FUNCTION
doit_step(void * d_temp_storage,
size_t & temp_storage_bytes,
ItemsIt items,
StencilIt stencil,
OutputIt output_it,
Predicate predicate,
NumSelectedOutIt num_selected_out,
Size num_items,
cudaStream_t stream,
bool debug_sync)
CUB_RUNTIME_FUNCTION
static cudaError_t doit_step(void * d_temp_storage,
size_t & temp_storage_bytes,
ItemsIt items,
StencilIt stencil,
OutputIt output_it,
Predicate predicate,
NumSelectedOutIt num_selected_out,
Size num_items,
cudaStream_t stream,
bool debug_sync)
{
if (num_items == 0)
return cudaSuccess;
Expand Down Expand Up @@ -695,7 +696,7 @@ namespace __copy_if {
typename StencilIt,
typename OutputIt,
typename Predicate>
THRUST_RUNTIME_FUNCTION
CUB_RUNTIME_FUNCTION
OutputIt copy_if(execution_policy<Derived>& policy,
InputIt first,
InputIt last,
Expand Down Expand Up @@ -789,28 +790,18 @@ copy_if(execution_policy<Derived> &policy,
OutputIterator result,
Predicate pred)
{
OutputIterator ret = result;

if (__THRUST_HAS_CUDART__)
{
ret = __copy_if::copy_if(policy,
first,
last,
__copy_if::no_stencil_tag(),
result,
pred);
}
else
{
#if !__THRUST_HAS_CUDART__
ret = thrust::copy_if(cvt_to_seq(derived_cast(policy)),
first,
last,
result,
pred);
#endif
}
return ret;
CUB_CDP_DISPATCH((result = __copy_if::copy_if(policy,
first,
last,
__copy_if::no_stencil_tag(),
result,
pred);),
(result = thrust::copy_if(cvt_to_seq(derived_cast(policy)),
first,
last,
result,
pred);));
return result;
} // func copy_if

__thrust_exec_check_disable__
Expand All @@ -827,29 +818,15 @@ copy_if(execution_policy<Derived> &policy,
OutputIterator result,
Predicate pred)
{
OutputIterator ret = result;

if (__THRUST_HAS_CUDART__)
{
ret = __copy_if::copy_if(policy,
first,
last,
stencil,
result,
pred);
}
else
{
#if !__THRUST_HAS_CUDART__
ret = thrust::copy_if(cvt_to_seq(derived_cast(policy)),
first,
last,
stencil,
result,
pred);
#endif
}
return ret;
CUB_CDP_DISPATCH(
(result = __copy_if::copy_if(policy, first, last, stencil, result, pred);),
(result = thrust::copy_if(cvt_to_seq(derived_cast(policy)),
first,
last,
stencil,
result,
pred);));
return result;
} // func copy_if

} // namespace cuda_cub
Expand Down
Loading

0 comments on commit c2d5261

Please sign in to comment.