diff --git a/.gitmodules b/.gitmodules index 1d8e604ef..0bb39f302 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "cub"] path = dependencies/cub url = ../cub.git +[submodule "libcudacxx"] + path = dependencies/libcudacxx + url = ../libcudacxx.git diff --git a/cmake/ThrustInstallRules.cmake b/cmake/ThrustInstallRules.cmake index 93084c11d..993dba153 100644 --- a/cmake/ThrustInstallRules.cmake +++ b/cmake/ThrustInstallRules.cmake @@ -24,27 +24,35 @@ configure_file("${Thrust_SOURCE_DIR}/thrust/cmake/thrust-header-search.cmake.in" install(FILES "${Thrust_BINARY_DIR}/thrust/cmake/thrust-header-search.cmake" DESTINATION "${install_location}") -# Depending on how Thrust is configured, CUB's CMake scripts may or may not be -# included, so maintain a set of CUB install rules in both projects. By default -# CUB headers are installed alongside Thrust -- this may be disabled by turning -# off THRUST_INSTALL_CUB_HEADERS. -option(THRUST_INSTALL_CUB_HEADERS "Include cub headers when installing." ON) +# Depending on how Thrust is configured, libcudacxx and CUB's CMake scripts may +# or may not be include()'d, so force include their install rules when requested. +# By default, these projects are installed alongside Thrust. This is controlled by +# THRUST_INSTALL_CUB_HEADERS and THRUST_INSTALL_LIBCUDACXX_HEADERS. +option(THRUST_INSTALL_CUB_HEADERS "Include CUB headers when installing." ON) if (THRUST_INSTALL_CUB_HEADERS) - install(DIRECTORY "${Thrust_SOURCE_DIR}/dependencies/cub/cub" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" - FILES_MATCHING - PATTERN "*.cuh" - ) + # Use a function to limit scope of the CUB_*_DIR vars: + function(_thrust_install_cub_headers) + # Fake these for the logic in CUBInstallRules.cmake: + set(CUB_SOURCE_DIR "${Thrust_SOURCE_DIR}/dependencies/cub/") + set(CUB_BINARY_DIR "${Thrust_BINARY_DIR}/cub-config/") + set(CUB_ENABLE_INSTALL_RULES ON) + set(CUB_IN_THRUST OFF) + include("${Thrust_SOURCE_DIR}/dependencies/cub/cmake/CubInstallRules.cmake") + endfunction() - # Need to configure a file to store THRUST_INSTALL_HEADER_INFIX - install(DIRECTORY "${Thrust_SOURCE_DIR}/dependencies/cub/cub/cmake/" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/cub" - PATTERN cub-header-search EXCLUDE - ) - set(install_location "${CMAKE_INSTALL_LIBDIR}/cmake/cub") - configure_file("${Thrust_SOURCE_DIR}/dependencies/cub/cub/cmake/cub-header-search.cmake.in" - "${Thrust_BINARY_DIR}/dependencies/cub/cub/cmake/cub-header-search.cmake" - @ONLY) - install(FILES "${Thrust_BINARY_DIR}/dependencies/cub/cub/cmake/cub-header-search.cmake" - DESTINATION "${install_location}") + _thrust_install_cub_headers() +endif() + +option(THRUST_INSTALL_LIBCUDACXX_HEADERS "Include libcudacxx headers when installing." ON) +if (THRUST_INSTALL_LIBCUDACXX_HEADERS) + # Use a function to limit scope of the libcudacxx_*_DIR vars: + function(_thrust_install_libcudacxx_headers) + # Fake these for the logic in libcudacxxInstallRules.cmake: + set(libcudacxx_SOURCE_DIR "${Thrust_SOURCE_DIR}/dependencies/libcudacxx/") + set(libcudacxx_BINARY_DIR "${Thrust_BINARY_DIR}/libcudacxx-config/") + set(libcudacxx_ENABLE_INSTALL_RULES ON) + include("${Thrust_SOURCE_DIR}/dependencies/libcudacxx/cmake/libcudacxxInstallRules.cmake") + endfunction() + + _thrust_install_libcudacxx_headers() endif() diff --git a/dependencies/cub b/dependencies/cub index 56dcb06d0..4de961aee 160000 --- a/dependencies/cub +++ b/dependencies/cub @@ -1 +1 @@ -Subproject commit 56dcb06d0cd7f923c373a27a7f9993722e0f50b4 +Subproject commit 4de961aee49c894e9c380d7c2f7e750016976f00 diff --git a/dependencies/libcudacxx b/dependencies/libcudacxx new file mode 160000 index 000000000..05d48aaa1 --- /dev/null +++ b/dependencies/libcudacxx @@ -0,0 +1 @@ +Subproject commit 05d48aaa12a3c310c333298331c41a9214f08f22 diff --git a/testing/allocator.cu b/testing/allocator.cu index a29408de9..175685ed0 100644 --- a/testing/allocator.cu +++ b/testing/allocator.cu @@ -2,6 +2,9 @@ #include #include #include + +#include + #include template @@ -60,9 +63,12 @@ DECLARE_VARIABLE_UNITTEST(TestAllocatorCustomCopyConstruct); template struct my_allocator_with_custom_destroy { - typedef T value_type; - typedef T & reference; - typedef const T & const_reference; + // This is only used with thrust::cpp::vector: + using system_type = thrust::cpp::tag; + + using value_type = T; + using reference = T &; + using const_reference = const T &; static bool g_state; @@ -80,9 +86,7 @@ struct my_allocator_with_custom_destroy __host__ __device__ void destroy(T *) { -#if !__CUDA_ARCH__ - g_state = true; -#endif + NV_IF_TARGET(NV_IS_HOST, (g_state = true;)); } value_type *allocate(std::ptrdiff_t n) @@ -119,12 +123,14 @@ bool my_allocator_with_custom_destroy::g_state = false; template void TestAllocatorCustomDestroy(size_t n) { + my_allocator_with_custom_destroy::g_state = false; + { thrust::cpp::vector > vec(n); } // destroy everything - if (0 < n) - ASSERT_EQUAL(true, my_allocator_with_custom_destroy::g_state); + // state should only be true when there are values to destroy: + ASSERT_EQUAL(n > 0, my_allocator_with_custom_destroy::g_state); } DECLARE_VARIABLE_UNITTEST(TestAllocatorCustomDestroy); @@ -203,7 +209,6 @@ void TestAllocatorTraitsRebind() } DECLARE_UNITTEST(TestAllocatorTraitsRebind); -#if THRUST_CPP_DIALECT >= 2011 void TestAllocatorTraitsRebindCpp11() { ASSERT_EQUAL( @@ -251,5 +256,3 @@ void TestAllocatorTraitsRebindCpp11() ); } DECLARE_UNITTEST(TestAllocatorTraitsRebindCpp11); -#endif // C++11 - diff --git a/testing/cuda/pair_sort.cu b/testing/cuda/pair_sort.cu index 87838e429..35a6b67e3 100644 --- a/testing/cuda/pair_sort.cu +++ b/testing/cuda/pair_sort.cu @@ -4,16 +4,11 @@ #include -template +template __global__ -void stable_sort_kernel(ExecutionPolicy exec, Iterator1 first, Iterator1 last, Iterator2 is_supported) +void stable_sort_kernel(ExecutionPolicy exec, Iterator first, Iterator last) { -#if (__CUDA_ARCH__ >= 200) - *is_supported = true; thrust::stable_sort(exec, first, last); -#else - *is_supported = false; -#endif } @@ -43,19 +38,14 @@ void TestPairStableSortDevice(ExecutionPolicy exec) thrust::device_vector

d_pairs = h_pairs; - thrust::device_vector is_supported(1); - - stable_sort_kernel<<<1,1>>>(exec, d_pairs.begin(), d_pairs.end(), is_supported.begin()); + stable_sort_kernel<<<1,1>>>(exec, d_pairs.begin(), d_pairs.end()); cudaError_t const err = cudaDeviceSynchronize(); ASSERT_EQUAL(cudaSuccess, err); - if(is_supported[0]) - { - // sort on the host - thrust::stable_sort(h_pairs.begin(), h_pairs.end()); + // sort on the host + thrust::stable_sort(h_pairs.begin(), h_pairs.end()); - ASSERT_EQUAL_QUIET(h_pairs, d_pairs); - } + ASSERT_EQUAL_QUIET(h_pairs, d_pairs); }; diff --git a/testing/cuda/pair_sort_by_key.cu b/testing/cuda/pair_sort_by_key.cu index 19996e5a2..59908eef4 100644 --- a/testing/cuda/pair_sort_by_key.cu +++ b/testing/cuda/pair_sort_by_key.cu @@ -6,16 +6,11 @@ #include -template +template __global__ -void stable_sort_by_key_kernel(ExecutionPolicy exec, Iterator1 keys_first, Iterator1 keys_last, Iterator2 values_first, Iterator3 is_supported) +void stable_sort_by_key_kernel(ExecutionPolicy exec, Iterator1 keys_first, Iterator1 keys_last, Iterator2 values_first) { -#if (__CUDA_ARCH__ >= 200) - *is_supported = true; thrust::stable_sort_by_key(exec, keys_first, keys_last, values_first); -#else - *is_supported = false; -#endif } @@ -51,21 +46,16 @@ void TestPairStableSortByKeyDevice(ExecutionPolicy exec) thrust::device_vector

d_pairs = h_pairs; thrust::device_vector d_values = h_values; - thrust::device_vector is_supported(1); - // sort on the device - stable_sort_by_key_kernel<<<1,1>>>(exec, d_pairs.begin(), d_pairs.end(), d_values.begin(), is_supported.begin()); + stable_sort_by_key_kernel<<<1,1>>>(exec, d_pairs.begin(), d_pairs.end(), d_values.begin()); cudaError_t const err = cudaDeviceSynchronize(); ASSERT_EQUAL(cudaSuccess, err); - if(is_supported[0]) - { - // sort on the host - thrust::stable_sort_by_key(h_pairs.begin(), h_pairs.end(), h_values.begin()); + // sort on the host + thrust::stable_sort_by_key(h_pairs.begin(), h_pairs.end(), h_values.begin()); - ASSERT_EQUAL_QUIET(h_pairs, d_pairs); - ASSERT_EQUAL(h_values, d_values); - } + ASSERT_EQUAL_QUIET(h_pairs, d_pairs); + ASSERT_EQUAL(h_values, d_values); }; diff --git a/testing/cuda/partition.cu b/testing/cuda/partition.cu index 2da7d35d2..f9ec48600 100644 --- a/testing/cuda/partition.cu +++ b/testing/cuda/partition.cu @@ -286,16 +286,11 @@ void TestPartitionCopyStencilDeviceNoSync() DECLARE_UNITTEST(TestPartitionCopyStencilDeviceNoSync); -template +template __global__ -void stable_partition_kernel(ExecutionPolicy exec, Iterator1 first, Iterator1 last, Predicate pred, Iterator2 result, Iterator3 is_supported) +void stable_partition_kernel(ExecutionPolicy exec, Iterator1 first, Iterator1 last, Predicate pred, Iterator2 result) { -#if (__CUDA_ARCH__ >= 200) - *is_supported = true; *result = thrust::stable_partition(exec, first, last, pred); -#else - *is_supported = false; -#endif } @@ -313,24 +308,20 @@ void TestStablePartitionDevice(ExecutionPolicy exec) data[4] = 2; thrust::device_vector result(1); - thrust::device_vector is_supported(1); - - stable_partition_kernel<<<1,1>>>(exec, data.begin(), data.end(), is_even(), result.begin(), is_supported.begin()); + + stable_partition_kernel<<<1,1>>>(exec, data.begin(), data.end(), is_even(), result.begin()); cudaError_t const err = cudaDeviceSynchronize(); ASSERT_EQUAL(cudaSuccess, err); - if(is_supported[0]) - { - thrust::device_vector ref(5); - ref[0] = 2; - ref[1] = 2; - ref[2] = 1; - ref[3] = 1; - ref[4] = 1; + thrust::device_vector ref(5); + ref[0] = 2; + ref[1] = 2; + ref[2] = 1; + ref[3] = 1; + ref[4] = 1; - ASSERT_EQUAL(2, (iterator)result[0] - data.begin()); - ASSERT_EQUAL(ref, data); - } + ASSERT_EQUAL(2, (iterator)result[0] - data.begin()); + ASSERT_EQUAL(ref, data); } @@ -355,16 +346,11 @@ void TestStablePartitionDeviceNoSync() DECLARE_UNITTEST(TestStablePartitionDeviceNoSync); -template +template __global__ -void stable_partition_kernel(ExecutionPolicy exec, Iterator1 first, Iterator1 last, Iterator2 stencil_first, Predicate pred, Iterator3 result, Iterator4 is_supported) +void stable_partition_kernel(ExecutionPolicy exec, Iterator1 first, Iterator1 last, Iterator2 stencil_first, Predicate pred, Iterator3 result) { -#if (__CUDA_ARCH__ >= 200) - *is_supported = true; *result = thrust::stable_partition(exec, first, last, stencil_first, pred); -#else - *is_supported = false; -#endif } @@ -389,24 +375,20 @@ void TestStablePartitionStencilDevice(ExecutionPolicy exec) stencil[4] = 2; thrust::device_vector result(1); - thrust::device_vector is_supported(1); - - stable_partition_kernel<<<1,1>>>(exec, data.begin(), data.end(), stencil.begin(), is_even(), result.begin(), is_supported.begin()); + + stable_partition_kernel<<<1,1>>>(exec, data.begin(), data.end(), stencil.begin(), is_even(), result.begin()); cudaError_t const err = cudaDeviceSynchronize(); ASSERT_EQUAL(cudaSuccess, err); - if(is_supported[0]) - { - thrust::device_vector ref(5); - ref[0] = 1; - ref[1] = 1; - ref[2] = 0; - ref[3] = 0; - ref[4] = 0; + thrust::device_vector ref(5); + ref[0] = 1; + ref[1] = 1; + ref[2] = 0; + ref[3] = 0; + ref[4] = 0; - ASSERT_EQUAL(2, (iterator)result[0] - data.begin()); - ASSERT_EQUAL(ref, data); - } + ASSERT_EQUAL(2, (iterator)result[0] - data.begin()); + ASSERT_EQUAL(ref, data); } diff --git a/testing/cuda/sort.cu b/testing/cuda/sort.cu index 7f3d6413c..1d341011f 100644 --- a/testing/cuda/sort.cu +++ b/testing/cuda/sort.cu @@ -4,16 +4,11 @@ #include -template +template __global__ -void sort_kernel(ExecutionPolicy exec, Iterator first, Iterator last, Compare comp, Iterator2 is_supported) +void sort_kernel(ExecutionPolicy exec, Iterator first, Iterator last, Compare comp) { -#if (__CUDA_ARCH__ >= 200) - *is_supported = true; thrust::sort(exec, first, last, comp); -#else - *is_supported = false; -#endif } @@ -34,19 +29,14 @@ void TestComparisonSortDevice(ExecutionPolicy exec, const size_t n, Compare comp thrust::host_vector h_data = unittest::random_integers(n); thrust::device_vector d_data = h_data; - thrust::device_vector is_supported(1); - - sort_kernel<<<1,1>>>(exec, d_data.begin(), d_data.end(), comp, is_supported.begin()); + sort_kernel<<<1,1>>>(exec, d_data.begin(), d_data.end(), comp); cudaError_t const err = cudaDeviceSynchronize(); ASSERT_EQUAL(cudaSuccess, err); - if(is_supported[0]) - { - thrust::sort(h_data.begin(), h_data.end(), comp); - - ASSERT_EQUAL(h_data, d_data); - } + thrust::sort(h_data.begin(), h_data.end(), comp); + + ASSERT_EQUAL(h_data, d_data); }; @@ -163,7 +153,7 @@ void TestComparisonSortCudaStreams() cudaStreamSynchronize(s); ASSERT_EQUAL(true, thrust::is_sorted(keys.begin(), keys.end(), my_less())); - + cudaStreamDestroy(s); } DECLARE_UNITTEST(TestComparisonSortCudaStreams); diff --git a/testing/cuda/sort_by_key.cu b/testing/cuda/sort_by_key.cu index 1e848879b..8863be27a 100644 --- a/testing/cuda/sort_by_key.cu +++ b/testing/cuda/sort_by_key.cu @@ -4,16 +4,11 @@ #include -template +template __global__ -void sort_by_key_kernel(ExecutionPolicy exec, Iterator1 keys_first, Iterator1 keys_last, Iterator2 values_first, Compare comp, Iterator3 is_supported) +void sort_by_key_kernel(ExecutionPolicy exec, Iterator1 keys_first, Iterator1 keys_last, Iterator2 values_first, Compare comp) { -#if (__CUDA_ARCH__ >= 200) - *is_supported = true; thrust::sort_by_key(exec, keys_first, keys_last, values_first, comp); -#else - *is_supported = false; -#endif } @@ -36,19 +31,15 @@ void TestComparisonSortByKeyDevice(ExecutionPolicy exec, const size_t n, Compare thrust::host_vector h_values = h_keys; thrust::device_vector d_values = d_keys; - - thrust::device_vector is_supported(1); - sort_by_key_kernel<<<1,1>>>(exec, d_keys.begin(), d_keys.end(), d_values.begin(), comp, is_supported.begin()); + + sort_by_key_kernel<<<1,1>>>(exec, d_keys.begin(), d_keys.end(), d_values.begin(), comp); cudaError_t const err = cudaDeviceSynchronize(); ASSERT_EQUAL(cudaSuccess, err); - if(is_supported[0]) - { - thrust::sort_by_key(h_keys.begin(), h_keys.end(), h_values.begin(), comp); - - ASSERT_EQUAL(h_keys, d_keys); - ASSERT_EQUAL(h_values, d_values); - } + thrust::sort_by_key(h_keys.begin(), h_keys.end(), h_values.begin(), comp); + + ASSERT_EQUAL(h_keys, d_keys); + ASSERT_EQUAL(h_values, d_values); }; @@ -139,7 +130,7 @@ void TestComparisonSortByKeyCudaStreams() ASSERT_EQUAL(true, thrust::is_sorted(keys.begin(), keys.end())); ASSERT_EQUAL(true, thrust::is_sorted(vals.begin(), vals.end())); - + cudaStreamDestroy(s); } DECLARE_UNITTEST(TestComparisonSortByKeyCudaStreams); @@ -169,7 +160,7 @@ void TestSortByKeyCudaStreams() ASSERT_EQUAL(true, thrust::is_sorted(keys.begin(), keys.end())); ASSERT_EQUAL(true, thrust::is_sorted(vals.begin(), vals.end())); - + cudaStreamDestroy(s); } DECLARE_UNITTEST(TestSortByKeyCudaStreams); diff --git a/testing/device_delete.cu b/testing/device_delete.cu index 6684cb2b5..12f757fa4 100644 --- a/testing/device_delete.cu +++ b/testing/device_delete.cu @@ -4,21 +4,23 @@ #include #include +#include + struct Foo { __host__ __device__ Foo(void) - :set_me_upon_destruction(0) + : set_me_upon_destruction{nullptr} {} __host__ __device__ ~Foo(void) { -#ifdef __CUDA_ARCH__ - // __device__ overload - if(set_me_upon_destruction != 0) - *set_me_upon_destruction = true; -#endif + NV_IF_TARGET(NV_IS_DEVICE, ( + if (set_me_upon_destruction != nullptr) + { + *set_me_upon_destruction = true; + })); } bool *set_me_upon_destruction; diff --git a/testing/uninitialized_copy.cu b/testing/uninitialized_copy.cu index 7455d8c81..62a79cdc9 100644 --- a/testing/uninitialized_copy.cu +++ b/testing/uninitialized_copy.cu @@ -3,6 +3,7 @@ #include #include +#include template ForwardIterator uninitialized_copy(my_system &system, @@ -147,13 +148,13 @@ struct CopyConstructTest __host__ __device__ CopyConstructTest(const CopyConstructTest &) { -#if __CUDA_ARCH__ - copy_constructed_on_device = true; - copy_constructed_on_host = false; -#else - copy_constructed_on_device = false; - copy_constructed_on_device = true; -#endif + NV_IF_TARGET(NV_IS_DEVICE, ( + copy_constructed_on_device = true; + copy_constructed_on_host = false; + ), ( + copy_constructed_on_device = false; + copy_constructed_on_host = true; + )); } __host__ __device__ diff --git a/testing/uninitialized_fill.cu b/testing/uninitialized_fill.cu index facd6fe6f..8fbb97002 100644 --- a/testing/uninitialized_fill.cu +++ b/testing/uninitialized_fill.cu @@ -3,6 +3,7 @@ #include #include +#include template void uninitialized_fill(my_system &system, @@ -156,13 +157,13 @@ struct CopyConstructTest __host__ __device__ CopyConstructTest(const CopyConstructTest &) { -#if __CUDA_ARCH__ - copy_constructed_on_device = true; - copy_constructed_on_host = false; -#else - copy_constructed_on_device = false; - copy_constructed_on_host = true; -#endif + NV_IF_TARGET(NV_IS_DEVICE, ( + copy_constructed_on_device = true; + copy_constructed_on_host = false; + ), ( + copy_constructed_on_device = false; + copy_constructed_on_host = true; + )); } __host__ __device__ diff --git a/testing/unittest/cuda/testframework.cu b/testing/unittest/cuda/testframework.cu index d5bc4aaba..ff30f368c 100644 --- a/testing/unittest/cuda/testframework.cu +++ b/testing/unittest/cuda/testframework.cu @@ -137,7 +137,6 @@ bool CUDATestDriver::run_tests(const ArgumentSet &args, const ArgumentMap &kwarg { std::cout << "--verbose and --concise cannot be used together" << std::endl; exit(EXIT_FAILURE); - return false; } // check error status before doing anything diff --git a/testing/unittest/runtime_static_assert.h b/testing/unittest/runtime_static_assert.h index 3e7b60290..d53bd3b20 100644 --- a/testing/unittest/runtime_static_assert.h +++ b/testing/unittest/runtime_static_assert.h @@ -18,8 +18,11 @@ namespace unittest #include #include +#include + #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA + #define ASSERT_STATIC_ASSERT(X) \ { \ bool triggered = false; \ @@ -86,11 +89,9 @@ namespace unittest { static_assert_exception ex(filename, lineno); -#ifdef __CUDA_ARCH__ - *detail::device_exception = ex; -#else - throw ex; -#endif + NV_IF_TARGET(NV_IS_DEVICE, + (*detail::device_exception = ex;), + (throw ex;)); } } } diff --git a/thrust/cmake/README.md b/thrust/cmake/README.md index c85a8c857..ae296b635 100644 --- a/thrust/cmake/README.md +++ b/thrust/cmake/README.md @@ -101,7 +101,7 @@ find_package(Thrust 1.9.10.1 EXACT) would only match the 1.9.10.1 release. -#### Using a Specific TBB or OpenMP Environment +#### Using an Explicit TBB or OpenMP CMake Target When `thrust_create_target` is called, it will lazily load the requested systems on-demand through internal `find_package` calls. If a project already @@ -112,9 +112,20 @@ thrust_set_TBB_target(MyTBBTarget) thrust_set_OMP_target(MyOMPTarget) ``` -These functions must be called **before** `thrust_create_target`, and will -have no effect if the dependency is loaded as a -`find_package(Thrust COMPONENT [...])` component. +These functions must be called **before** the corresponding system is loaded +through `thrust_create_target` or `find_package(Thrust COMPONENT [OMP|TBB])`. + +#### Using an Explicit libcu++ CMake Target + +In contrast to the optional TBB/OMP dependencies, there is no +`thrust_set_libcudacxx_target` function that specifies an explicit libcu++ +target. This is because libcu++ is always required and must be found during the +initial `find_target(Thrust)` call that defines these functions. + +To force Thrust to use a specific libcu++ target, ensure that either the +`Thrust::libcudacxx` or `libcudacxx::libcudacxx` targets are defined prior to +the first invocation of `find_package(Thrust)`. Thrust will automatically use +these, giving preference to the `Thrust::libcudacxx` target. #### Testing for Systems diff --git a/thrust/cmake/thrust-config.cmake b/thrust/cmake/thrust-config.cmake index f7589f6cc..fe88a961c 100644 --- a/thrust/cmake/thrust-config.cmake +++ b/thrust/cmake/thrust-config.cmake @@ -77,6 +77,9 @@ cmake_minimum_required(VERSION 3.15) +# Minimum supported libcudacxx version: +set(thrust_libcudacxx_version 1.8.0) + ################################################################################ # User variables and APIs. Users can rely on these: # @@ -85,19 +88,21 @@ cmake_minimum_required(VERSION 3.15) set(THRUST_HOST_SYSTEM_OPTIONS CPP OMP TBB CACHE INTERNAL "Valid Thrust host systems." + FORCE ) set(THRUST_DEVICE_SYSTEM_OPTIONS CUDA CPP OMP TBB CACHE INTERNAL "Valid Thrust device systems" + FORCE ) # Workaround cmake issue #20670 https://gitlab.kitware.com/cmake/cmake/-/issues/20670 -set(THRUST_VERSION ${${CMAKE_FIND_PACKAGE_NAME}_VERSION} CACHE INTERNAL "") -set(THRUST_VERSION_MAJOR ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_MAJOR} CACHE INTERNAL "") -set(THRUST_VERSION_MINOR ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_MINOR} CACHE INTERNAL "") -set(THRUST_VERSION_PATCH ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_PATCH} CACHE INTERNAL "") -set(THRUST_VERSION_TWEAK ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_TWEAK} CACHE INTERNAL "") -set(THRUST_VERSION_COUNT ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_COUNT} CACHE INTERNAL "") +set(THRUST_VERSION ${${CMAKE_FIND_PACKAGE_NAME}_VERSION} CACHE INTERNAL "" FORCE) +set(THRUST_VERSION_MAJOR ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_MAJOR} CACHE INTERNAL "" FORCE) +set(THRUST_VERSION_MINOR ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_MINOR} CACHE INTERNAL "" FORCE) +set(THRUST_VERSION_PATCH ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_PATCH} CACHE INTERNAL "" FORCE) +set(THRUST_VERSION_TWEAK ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_TWEAK} CACHE INTERNAL "" FORCE) +set(THRUST_VERSION_COUNT ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_COUNT} CACHE INTERNAL "" FORCE) function(thrust_create_target target_name) thrust_debug("Assembling target ${target_name}. Options: ${ARGN}" internal) @@ -109,7 +114,7 @@ function(thrust_create_target target_name) IGNORE_DEPRECATED_COMPILER IGNORE_DEPRECATED_CPP_11 IGNORE_DEPRECATED_CPP_DIALECT - ) + ) set(keys DEVICE DEVICE_OPTION @@ -117,13 +122,13 @@ function(thrust_create_target target_name) HOST HOST_OPTION HOST_OPTION_DOC - ) + ) cmake_parse_arguments(TCT "${options}" "${keys}" "" ${ARGN}) if (TCT_UNPARSED_ARGUMENTS) message(AUTHOR_WARNING "Unrecognized arguments passed to thrust_create_target: " ${TCT_UNPARSED_ARGUMENTS} - ) + ) endif() # Check that the main Thrust internal target is available @@ -133,7 +138,7 @@ function(thrust_create_target target_name) message(AUTHOR_WARNING "The `thrust_create_target` function was called outside the scope of the " "thrust targets. Call find_package again to recreate targets." - ) + ) endif() _thrust_set_if_undefined(TCT_HOST CPP) @@ -145,12 +150,14 @@ function(thrust_create_target target_name) if (NOT TCT_HOST IN_LIST THRUST_HOST_SYSTEM_OPTIONS) message(FATAL_ERROR - "Requested HOST=${TCT_HOST}; must be one of ${THRUST_HOST_SYSTEM_OPTIONS}") + "Requested HOST=${TCT_HOST}; must be one of ${THRUST_HOST_SYSTEM_OPTIONS}" + ) endif() if (NOT TCT_DEVICE IN_LIST THRUST_DEVICE_SYSTEM_OPTIONS) message(FATAL_ERROR - "Requested DEVICE=${TCT_DEVICE}; must be one of ${THRUST_DEVICE_SYSTEM_OPTIONS}") + "Requested DEVICE=${TCT_DEVICE}; must be one of ${THRUST_DEVICE_SYSTEM_OPTIONS}" + ) endif() if (TCT_FROM_OPTIONS) @@ -172,7 +179,7 @@ function(thrust_create_target target_name) # We can just create an INTERFACE IMPORTED target here instead of going # through _thrust_declare_interface_alias as long as we aren't hanging any - # Thrust/CUB include paths on ${target_name}. + # Thrust/CUB include paths directly on ${target_name}. add_library(${target_name} INTERFACE IMPORTED) target_link_libraries(${target_name} INTERFACE @@ -346,14 +353,15 @@ function(thrust_debug_internal_targets) _thrust_debug_backend_targets(CPP "Thrust ${THRUST_VERSION}") - _thrust_debug_backend_targets(CUDA "CUB ${THRUST_CUB_VERSION}") - thrust_debug_target(CUB::CUB "${THRUST_CUB_VERSION}") + _thrust_debug_backend_targets(OMP "${THRUST_OMP_VERSION}") + thrust_debug_target(OpenMP::OpenMP_CXX "${THRUST_OMP_VERSION}") _thrust_debug_backend_targets(TBB "${THRUST_TBB_VERSION}") thrust_debug_target(TBB:tbb "${THRUST_TBB_VERSION}") - _thrust_debug_backend_targets(OMP "${THRUST_OMP_VERSION}") - thrust_debug_target(OpenMP::OpenMP_CXX "${THRUST_OMP_VERSION}") + _thrust_debug_backend_targets(CUDA "CUB ${THRUST_CUB_VERSION}") + thrust_debug_target(CUB::CUB "${THRUST_CUB_VERSION}") + thrust_debug_target(libcudacxx::libcudacxx "${THRUST_libcudacxx_VERSION}") endfunction() ################################################################################ @@ -434,18 +442,38 @@ function(_thrust_setup_system backend) endif() endfunction() -# Use the provided cub_target for the CUDA backend. If Thrust::CUDA already +# Use the provided cub_target for the CUDA backend. If Thrust::CUB already # exists, this call has no effect. function(thrust_set_CUB_target cub_target) - if (NOT TARGET Thrust::CUDA) + if (NOT TARGET Thrust::CUB) thrust_debug("Setting CUB target to ${cub_target}" internal) # Workaround cmake issue #20670 https://gitlab.kitware.com/cmake/cmake/-/issues/20670 - set(THRUST_CUB_VERSION ${CUB_VERSION} CACHE INTERNAL "CUB version used by Thrust") - _thrust_declare_interface_alias(Thrust::CUDA _Thrust_CUDA) - target_link_libraries(_Thrust_CUDA INTERFACE Thrust::Thrust ${cub_target}) + set(THRUST_CUB_VERSION ${CUB_VERSION} CACHE INTERNAL + "CUB version used by Thrust" + FORCE + ) + _thrust_declare_interface_alias(Thrust::CUB _Thrust_CUB) + target_link_libraries(_Thrust_CUB INTERFACE ${cub_target}) thrust_debug_target(${cub_target} "${THRUST_CUB_VERSION}" internal) - thrust_debug_target(Thrust::CUDA "CUB ${THRUST_CUB_VERSION}" internal) - _thrust_setup_system(CUDA) + thrust_debug_target(Thrust::CUB "CUB ${THRUST_CUB_VERSION}" internal) + endif() +endfunction() + +# Internal use only -- libcudacxx must be found during the initial +# `find_package(Thrust)` call and cannot be set afterwards. See README.md in +# this directory for details on using a specific libcudacxx target. +function(_thrust_set_libcudacxx_target libcudacxx_target) + if (NOT TARGET Thrust::libcudacxx) + thrust_debug("Setting libcudacxx target to ${libcudacxx_target}" internal) + # Workaround cmake issue #20670 https://gitlab.kitware.com/cmake/cmake/-/issues/20670 + set(THRUST_libcudacxx_VERSION ${libcudacxx_VERSION} CACHE INTERNAL + "libcudacxx version used by Thrust" + FORCE + ) + _thrust_declare_interface_alias(Thrust::libcudacxx _Thrust_libcudacxx) + target_link_libraries(_Thrust_libcudacxx INTERFACE ${libcudacxx_target}) + thrust_debug_target(${libcudacxx_target} "${THRUST_libcudacxx_VERSION}" internal) + thrust_debug_target(Thrust::libcudacxx "libcudacxx ${THRUST_libcudacxx_VERSION}" internal) endif() endfunction() @@ -455,7 +483,10 @@ function(thrust_set_TBB_target tbb_target) if (NOT TARGET Thrust::TBB) thrust_debug("Setting TBB target to ${tbb_target}" internal) # Workaround cmake issue #20670 https://gitlab.kitware.com/cmake/cmake/-/issues/20670 - set(THRUST_TBB_VERSION ${TBB_VERSION} CACHE INTERNAL "TBB version used by Thrust") + set(THRUST_TBB_VERSION ${TBB_VERSION} CACHE INTERNAL + "TBB version used by Thrust" + FORCE + ) _thrust_declare_interface_alias(Thrust::TBB _Thrust_TBB) target_link_libraries(_Thrust_TBB INTERFACE Thrust::Thrust ${tbb_target}) thrust_debug_target(${tbb_target} "${THRUST_TBB_VERSION}" internal) @@ -470,7 +501,10 @@ function(thrust_set_OMP_target omp_target) if (NOT TARGET Thrust::OMP) thrust_debug("Setting OMP target to ${omp_target}" internal) # Workaround cmake issue #20670 https://gitlab.kitware.com/cmake/cmake/-/issues/20670 - set(THRUST_OMP_VERSION ${OpenMP_CXX_VERSION} CACHE INTERNAL "OpenMP version used by Thrust") + set(THRUST_OMP_VERSION ${OpenMP_CXX_VERSION} CACHE INTERNAL + "OpenMP version used by Thrust" + FORCE + ) _thrust_declare_interface_alias(Thrust::OMP _Thrust_OMP) target_link_libraries(_Thrust_OMP INTERFACE Thrust::Thrust ${omp_target}) thrust_debug_target(${omp_target} "${THRUST_OMP_VERSION}" internal) @@ -495,7 +529,7 @@ endfunction() # #20670 -- otherwise variables like CUB_VERSION, etc won't be in the caller's # scope. macro(_thrust_find_CUDA required) - if (NOT TARGET Thrust::CUDA) + if (NOT TARGET Thrust::CUB) thrust_debug("Searching for CUB ${required}" internal) find_package(CUB ${THRUST_VERSION} CONFIG ${_THRUST_QUIET_FLAG} @@ -513,6 +547,16 @@ macro(_thrust_find_CUDA required) thrust_debug("CUB not found!" internal) endif() endif() + + if (NOT TARGET Thrust::CUDA) + _thrust_declare_interface_alias(Thrust::CUDA _Thrust_CUDA) + _thrust_setup_system(CUDA) + target_link_libraries(_Thrust_CUDA INTERFACE + Thrust::Thrust + Thrust::CUB + ) + thrust_debug_target(Thrust::CUDA "" internal) + endif() endmacro() # This must be a macro instead of a function to ensure that backends passed to @@ -619,14 +663,17 @@ endmacro() # if (${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) - set(_THRUST_QUIET ON CACHE INTERNAL "Quiet mode enabled for Thrust find_package calls.") - set(_THRUST_QUIET_FLAG "QUIET" CACHE INTERNAL "") + set(_THRUST_QUIET ON CACHE INTERNAL "Quiet mode enabled for Thrust find_package calls." FORCE) + set(_THRUST_QUIET_FLAG "QUIET" CACHE INTERNAL "" FORCE) else() unset(_THRUST_QUIET CACHE) unset(_THRUST_QUIET_FLAG CACHE) endif() -set(_THRUST_CMAKE_DIR "${CMAKE_CURRENT_LIST_DIR}" CACHE INTERNAL "Location of thrust-config.cmake") +set(_THRUST_CMAKE_DIR "${CMAKE_CURRENT_LIST_DIR}" CACHE INTERNAL + "Location of thrust-config.cmake" + FORCE +) # Internal target that actually holds the Thrust interface. Used by all other Thrust targets. if (NOT TARGET Thrust::Thrust) @@ -634,12 +681,45 @@ if (NOT TARGET Thrust::Thrust) # Pull in the include dir detected by thrust-config-version.cmake set(_THRUST_INCLUDE_DIR "${_THRUST_VERSION_INCLUDE_DIR}" CACHE INTERNAL "Location of Thrust headers." + FORCE ) unset(_THRUST_VERSION_INCLUDE_DIR CACHE) # Clear tmp variable from cache target_include_directories(_Thrust_Thrust INTERFACE "${_THRUST_INCLUDE_DIR}") thrust_debug_target(Thrust::Thrust "${THRUST_VERSION}" internal) endif() +# Find libcudacxx prior to locating backend-specific deps. This ensures that CUB +# finds the same package. +if (NOT TARGET Thrust::libcudacxx) + thrust_debug("Searching for libcudacxx REQUIRED" internal) + + # First do a non-required search for any co-packaged versions. + # These are preferred. + find_package(libcudacxx ${thrust_libcudacxx_version} CONFIG + ${_THRUST_QUIET_FLAG} + NO_DEFAULT_PATH # Only check the explicit HINTS below: + HINTS + "${_THRUST_INCLUDE_DIR}/dependencies/libcudacxx" # Source layout (GitHub) + "${_THRUST_INCLUDE_DIR}/../libcudacxx" # Source layout (Perforce) + "${_THRUST_CMAKE_DIR}/.." # Install layout + ) + + # A second required search allows externally packaged to be used and fails if + # no suitable package exists. + find_package(libcudacxx ${thrust_libcudacxx_version} CONFIG + REQUIRED + ${_THRUST_QUIET_FLAG} + ) + + if (TARGET libcudacxx::libcudacxx) + _thrust_set_libcudacxx_target(libcudacxx::libcudacxx) + else() + thrust_debug("Expected libcudacxx::libcudacxx target not found!" internal) + endif() + + target_link_libraries(_Thrust_Thrust INTERFACE Thrust::libcudacxx) +endif() + # Handle find_package COMPONENT requests: foreach(component ${${CMAKE_FIND_PACKAGE_NAME}_FIND_COMPONENTS}) if (NOT component IN_LIST THRUST_HOST_SYSTEM_OPTIONS AND diff --git a/thrust/detail/allocator/no_throw_allocator.h b/thrust/detail/allocator/no_throw_allocator.h index ea158d77f..a6c16985b 100644 --- a/thrust/detail/allocator/no_throw_allocator.h +++ b/thrust/detail/allocator/no_throw_allocator.h @@ -18,6 +18,8 @@ #include +#include + THRUST_NAMESPACE_BEGIN namespace detail { @@ -43,18 +45,18 @@ template __host__ __device__ void deallocate(typename super_t::pointer p, typename super_t::size_type n) { -#ifndef __CUDA_ARCH__ - try - { + NV_IF_TARGET(NV_IS_HOST, ( + try + { + super_t::deallocate(p, n); + } // end try + catch(...) + { + // catch anything + } // end catch + ), ( super_t::deallocate(p, n); - } // end try - catch(...) - { - // catch anything - } // end catch -#else - super_t::deallocate(p, n); -#endif + )); } // end deallocate() inline __host__ __device__ diff --git a/thrust/detail/allocator/temporary_allocator.inl b/thrust/detail/allocator/temporary_allocator.inl index 75aa7b9dc..609b0d318 100644 --- a/thrust/detail/allocator/temporary_allocator.inl +++ b/thrust/detail/allocator/temporary_allocator.inl @@ -22,10 +22,13 @@ #include #include -#if (defined(_NVHPC_CUDA) || defined(__CUDA_ARCH__)) && \ - THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA +#include + +#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA +#if (defined(_NVHPC_CUDA) || defined(__CUDA_ARCH__)) #include -#endif +#endif // NVCC device pass or NVC++ +#endif // CUDA THRUST_NAMESPACE_BEGIN namespace detail @@ -47,15 +50,11 @@ __host__ __device__ // note that we pass cnt to deallocate, not a value derived from result.second deallocate(result.first, cnt); - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - throw thrust::system::detail::bad_alloc("temporary_buffer::allocate: get_temporary_buffer failed"); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE && THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA - thrust::system::cuda::detail::terminate_with_message("temporary_buffer::allocate: get_temporary_buffer failed"); - #endif - } + NV_IF_TARGET(NV_IS_HOST, ( + throw thrust::system::detail::bad_alloc("temporary_buffer::allocate: get_temporary_buffer failed"); + ), ( // NV_IS_DEVICE + thrust::system::cuda::detail::terminate_with_message("temporary_buffer::allocate: get_temporary_buffer failed"); + )); } // end if return result.first; diff --git a/thrust/detail/config/cpp_compatibility.h b/thrust/detail/config/cpp_compatibility.h index d924f79cf..18b9cbdcf 100644 --- a/thrust/detail/config/cpp_compatibility.h +++ b/thrust/detail/config/cpp_compatibility.h @@ -73,20 +73,29 @@ # endif #endif -#if defined(_NVHPC_CUDA) -# define THRUST_IS_DEVICE_CODE __builtin_is_device_code() -# define THRUST_IS_HOST_CODE (!__builtin_is_device_code()) -# define THRUST_INCLUDE_DEVICE_CODE 1 -# define THRUST_INCLUDE_HOST_CODE 1 -#elif defined(__CUDA_ARCH__) -# define THRUST_IS_DEVICE_CODE 1 -# define THRUST_IS_HOST_CODE 0 -# define THRUST_INCLUDE_DEVICE_CODE 1 -# define THRUST_INCLUDE_HOST_CODE 0 -#else -# define THRUST_IS_DEVICE_CODE 0 -# define THRUST_IS_HOST_CODE 1 -# define THRUST_INCLUDE_DEVICE_CODE 0 -# define THRUST_INCLUDE_HOST_CODE 1 -#endif - +// These definitions were intended for internal use only and are now obsolete. +// If you relied on them, consider porting your code to use the functionality +// in libcu++'s header. +// For a temporary workaround, define THRUST_PROVIDE_LEGACY_ARCH_MACROS to make +// them available again. These should be considered deprecated and will be +// fully removed in a future version. +#ifdef THRUST_PROVIDE_LEGACY_ARCH_MACROS + #ifndef THRUST_IS_DEVICE_CODE + #if defined(_NVHPC_CUDA) + #define THRUST_IS_DEVICE_CODE __builtin_is_device_code() + #define THRUST_IS_HOST_CODE (!__builtin_is_device_code()) + #define THRUST_INCLUDE_DEVICE_CODE 1 + #define THRUST_INCLUDE_HOST_CODE 1 + #elif defined(__CUDA_ARCH__) + #define THRUST_IS_DEVICE_CODE 1 + #define THRUST_IS_HOST_CODE 0 + #define THRUST_INCLUDE_DEVICE_CODE 1 + #define THRUST_INCLUDE_HOST_CODE 0 + #else + #define THRUST_IS_DEVICE_CODE 0 + #define THRUST_IS_HOST_CODE 1 + #define THRUST_INCLUDE_DEVICE_CODE 0 + #define THRUST_INCLUDE_HOST_CODE 1 + #endif + #endif +#endif // THRUST_PROVIDE_LEGACY_ARCH_MACROS diff --git a/thrust/detail/contiguous_storage.inl b/thrust/detail/contiguous_storage.inl index b82b83399..7ae8657f0 100644 --- a/thrust/detail/contiguous_storage.inl +++ b/thrust/detail/contiguous_storage.inl @@ -25,6 +25,8 @@ #include #include +#include + #include // for std::runtime_error #include // for use of std::swap in the WAR below @@ -432,19 +434,16 @@ __host__ __device__ void contiguous_storage ::swap_allocators(false_type, Alloc &other) { - if (THRUST_IS_DEVICE_CODE) { - #if THRUST_INCLUDE_DEVICE_CODE - // allocators must be equal when swapping containers with allocators that propagate on swap - assert(!is_allocator_not_equal(other)); - #endif - } else { - #if THRUST_INCLUDE_HOST_CODE - if (is_allocator_not_equal(other)) - { - throw allocator_mismatch_on_swap(); - } - #endif - } + NV_IF_TARGET(NV_IS_DEVICE, ( + // allocators must be equal when swapping containers with allocators that propagate on swap + assert(!is_allocator_not_equal(other)); + ), ( + if (is_allocator_not_equal(other)) + { + throw allocator_mismatch_on_swap(); + } + )); + thrust::swap(m_allocator, other); } // end contiguous_storage::swap_allocators() diff --git a/thrust/detail/integer_math.h b/thrust/detail/integer_math.h index 76887a1ea..0f8c8aac1 100644 --- a/thrust/detail/integer_math.h +++ b/thrust/detail/integer_math.h @@ -17,14 +17,13 @@ #pragma once #include -#include +#include -#if THRUST_CPP_DIALECT >= 2011 - #include -#endif +#include -THRUST_NAMESPACE_BEGIN +#include +THRUST_NAMESPACE_BEGIN namespace detail { @@ -33,25 +32,23 @@ __host__ __device__ __thrust_forceinline__ Integer clz(Integer x) { Integer result; - if (THRUST_IS_DEVICE_CODE) { - #if THRUST_INCLUDE_DEVICE_CODE - result = ::__clz(x); - #endif - } else { - #if THRUST_INCLUDE_HOST_CODE - int num_bits = 8 * sizeof(Integer); - int num_bits_minus_one = num_bits - 1; - result = num_bits; - for (int i = num_bits_minus_one; i >= 0; --i) + + NV_IF_TARGET(NV_IS_DEVICE, ( + result = ::__clz(x); + ), ( + int num_bits = 8 * sizeof(Integer); + int num_bits_minus_one = num_bits - 1; + result = num_bits; + for (int i = num_bits_minus_one; i >= 0; --i) + { + if ((Integer(1) << i) & x) { - if ((Integer(1) << i) & x) - { - result = num_bits_minus_one - i; - break; - } + result = num_bits_minus_one - i; + break; } - #endif - } + } + )); + return result; } diff --git a/thrust/detail/memory_algorithms.h b/thrust/detail/memory_algorithms.h index bc50f307c..2f6b3a81d 100644 --- a/thrust/detail/memory_algorithms.h +++ b/thrust/detail/memory_algorithms.h @@ -12,11 +12,14 @@ #include #include #include +#include #include +#include + #include #include -#include + THRUST_NAMESPACE_BEGIN @@ -102,7 +105,6 @@ ForwardIt destroy_n(Allocator const& alloc, ForwardIt first, Size n) return first; } -#if THRUST_CPP_DIALECT >= 2011 template __host__ __device__ void uninitialized_construct( @@ -112,17 +114,24 @@ void uninitialized_construct( using T = typename iterator_traits::value_type; ForwardIt current = first; - #if !__CUDA_ARCH__ // No exceptions in CUDA. - try { - #endif + + // No exceptions in CUDA. + NV_IF_TARGET(NV_IS_HOST, ( + try { + for (; current != last; ++current) + { + ::new (static_cast(addressof(*current))) T(args...); + } + } catch (...) { + destroy(first, current); + throw; + } + ), ( for (; current != last; ++current) + { ::new (static_cast(addressof(*current))) T(args...); - #if !__CUDA_ARCH__ // No exceptions in CUDA. - } catch (...) { - destroy(first, current); - throw; - } - #endif + } + )); } template @@ -140,17 +149,24 @@ void uninitialized_construct_with_allocator( typename traits::allocator_type alloc_T(alloc); ForwardIt current = first; - #if !__CUDA_ARCH__ // No exceptions in CUDA. - try { - #endif + + // No exceptions in CUDA. + NV_IF_TARGET(NV_IS_HOST, ( + try { + for (; current != last; ++current) + { + traits::construct(alloc_T, addressof(*current), args...); + } + } catch (...) { + destroy(alloc_T, first, current); + throw; + } + ), ( for (; current != last; ++current) + { traits::construct(alloc_T, addressof(*current), args...); - #if !__CUDA_ARCH__ // No exceptions in CUDA. - } catch (...) { - destroy(alloc_T, first, current); - throw; - } - #endif + } + )); } template @@ -161,17 +177,24 @@ void uninitialized_construct_n( using T = typename iterator_traits::value_type; ForwardIt current = first; - #if !__CUDA_ARCH__ // No exceptions in CUDA. - try { - #endif - for (; n > 0; (void) ++current, --n) + + // No exceptions in CUDA. + NV_IF_TARGET(NV_IS_HOST, ( + try { + for (; n > 0; ++current, --n) + { + ::new (static_cast(addressof(*current))) T(args...); + } + } catch (...) { + destroy(first, current); + throw; + } + ), ( + for (; n > 0; ++current, --n) + { ::new (static_cast(addressof(*current))) T(args...); - #if !__CUDA_ARCH__ // No exceptions in CUDA. - } catch (...) { - destroy(first, current); - throw; - } - #endif + } + )); } template @@ -189,19 +212,25 @@ void uninitialized_construct_n_with_allocator( typename traits::allocator_type alloc_T(alloc); ForwardIt current = first; - #if !__CUDA_ARCH__ // No exceptions in CUDA. - try { - #endif + + // No exceptions in CUDA. + NV_IF_TARGET(NV_IS_HOST, ( + try { + for (; n > 0; (void) ++current, --n) + { + traits::construct(alloc_T, addressof(*current), args...); + } + } catch (...) { + destroy(alloc_T, first, current); + throw; + } + ), ( for (; n > 0; (void) ++current, --n) + { traits::construct(alloc_T, addressof(*current), args...); - #if !__CUDA_ARCH__ // No exceptions in CUDA. - } catch (...) { - destroy(alloc_T, first, current); - throw; - } - #endif + } + )); } -#endif /////////////////////////////////////////////////////////////////////////////// diff --git a/thrust/detail/type_traits.h b/thrust/detail/type_traits.h index d147f8328..5596f569e 100644 --- a/thrust/detail/type_traits.h +++ b/thrust/detail/type_traits.h @@ -568,15 +568,7 @@ template struct largest_available_float { -#if defined(__CUDA_ARCH__) -# if (__CUDA_ARCH__ < 130) - typedef float type; -# else typedef double type; -# endif -#else - typedef double type; -#endif }; // T1 wins if they are both the same size diff --git a/thrust/device_allocator.h b/thrust/device_allocator.h index bce4d947b..f64c3854f 100644 --- a/thrust/device_allocator.h +++ b/thrust/device_allocator.h @@ -115,7 +115,7 @@ class device_allocator }; /*! Default constructor has no effect. */ - __host__ + __host__ __device__ device_allocator() {} /*! Copy constructor has no effect. */ @@ -124,15 +124,13 @@ class device_allocator /*! Constructor from other \p device_allocator has no effect. */ template - __host__ + __host__ __device__ device_allocator(const device_allocator& other) : base(other) {} -#if THRUST_CPP_DIALECT >= 2011 device_allocator & operator=(const device_allocator &) = default; -#endif /*! Destructor has no effect. */ - __host__ + __host__ __device__ ~device_allocator() {} }; diff --git a/thrust/mr/allocator.h b/thrust/mr/allocator.h index b907c09db..67adbe87c 100644 --- a/thrust/mr/allocator.h +++ b/thrust/mr/allocator.h @@ -219,7 +219,8 @@ class stateless_resource_allocator : public thrust::mr::allocator /*! Default constructor. Uses \p get_global_resource to get the global instance of \p Upstream and initializes the * \p allocator base subobject with that resource. */ - __host__ + __thrust_exec_check_disable__ + __host__ __device__ stateless_resource_allocator() : base(get_global_resource()) { } diff --git a/thrust/system/cuda/config.h b/thrust/system/cuda/config.h index 734e47bad..251f8d180 100644 --- a/thrust/system/cuda/config.h +++ b/thrust/system/cuda/config.h @@ -33,7 +33,7 @@ #include #if defined(__CUDACC__) || defined(_NVHPC_CUDA) -# if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__>= 350 && defined(__CUDACC_RDC__)) +# if !defined(__CUDA_ARCH__) || defined(__CUDACC_RDC__) # define __THRUST_HAS_CUDART__ 1 # define THRUST_RUNTIME_FUNCTION __host__ __device__ __forceinline__ # else @@ -45,9 +45,17 @@ # define THRUST_RUNTIME_FUNCTION __host__ __forceinline__ #endif +// These definitions were intended for internal use only and are now obsolete. +// If you relied on them, consider porting your code to use the functionality +// in libcu++'s header. +// For a temporary workaround, define THRUST_PROVIDE_LEGACY_ARCH_MACROS to make +// them available again. These should be considered deprecated and will be +// fully removed in a future version. +#ifdef THRUST_PROVIDE_LEGACY_ARCH_MACROS #ifdef __CUDA_ARCH__ #define THRUST_DEVICE_CODE -#endif +#endif // __CUDA_ARCH__ +#endif // THRUST_PROVIDE_LEGACY_ARCH_MACROS #ifdef THRUST_AGENT_ENTRY_NOINLINE #define THRUST_AGENT_ENTRY_INLINE_ATTR __noinline__ diff --git a/thrust/system/cuda/detail/assign_value.h b/thrust/system/cuda/detail/assign_value.h index 195493a4f..8945f1cac 100644 --- a/thrust/system/cuda/detail/assign_value.h +++ b/thrust/system/cuda/detail/assign_value.h @@ -24,6 +24,7 @@ #include #include +#include THRUST_NAMESPACE_BEGIN namespace cuda_cub { @@ -47,15 +48,12 @@ inline __host__ __device__ } }; - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - war_nvbugs_881631::host_path(exec,dst,src); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - war_nvbugs_881631::device_path(exec,dst,src); - #endif - } + NV_IF_TARGET(NV_IS_HOST, ( + war_nvbugs_881631::host_path(exec,dst,src); + ), ( + war_nvbugs_881631::device_path(exec,dst,src); + )); + } // end assign_value() @@ -83,20 +81,14 @@ inline __host__ __device__ } }; - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - war_nvbugs_881631::host_path(systems,dst,src); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - war_nvbugs_881631::device_path(systems,dst,src); - #endif - } + NV_IF_TARGET(NV_IS_HOST, ( + war_nvbugs_881631::host_path(systems,dst,src); + ), ( + war_nvbugs_881631::device_path(systems,dst,src); + )); } // end assign_value() - - } // end cuda_cub THRUST_NAMESPACE_END #endif diff --git a/thrust/system/cuda/detail/core/agent_launcher.h b/thrust/system/cuda/detail/core/agent_launcher.h index 8a79a87c7..4cdd7ff46 100644 --- a/thrust/system/cuda/detail/core/agent_launcher.h +++ b/thrust/system/cuda/detail/core/agent_launcher.h @@ -36,14 +36,7 @@ #include #include -#if 0 -#define __THRUST__TEMPLATE_DEBUG -#endif - -#if __THRUST__TEMPLATE_DEBUG -template class ID_impl; -template class Foo { ID_impl t;}; -#endif +#include THRUST_NAMESPACE_BEGIN namespace cuda_cub { @@ -521,15 +514,9 @@ namespace core { { if (debug_sync) { - if (THRUST_IS_DEVICE_CODE) { - #if THRUST_INCLUDE_DEVICE_CODE - cub::detail::device_synchronize(); - #endif - } else { - #if THRUST_INCLUDE_HOST_CODE - cudaStreamSynchronize(stream); - #endif - } + NV_IF_TARGET(NV_IS_HOST, + (cudaStreamSynchronize(stream);), + (cub::detail::device_synchronize();)); } } @@ -747,16 +734,6 @@ namespace core { void THRUST_RUNTIME_FUNCTION launch(Args... args) const { -#if __THRUST__TEMPLATE_DEBUG -#ifdef __CUDA_ARCH__ - typedef typename Foo< - shm1::v1, - shm1::v2, - shm1::v3, - shm1::v4, - shm1::v5>::t tt; -#endif -#endif launch_impl(has_enough_shmem_t(),args...); sync(); } diff --git a/thrust/system/cuda/detail/core/util.h b/thrust/system/cuda/detail/core/util.h index 83c05fd61..4e014ccc6 100644 --- a/thrust/system/cuda/detail/core/util.h +++ b/thrust/system/cuda/detail/core/util.h @@ -26,15 +26,17 @@ ******************************************************************************/ #pragma once -#include #include -#include -#include #include +#include #include +#include + #include -#include #include +#include + +#include THRUST_NAMESPACE_BEGIN @@ -356,27 +358,20 @@ namespace core { // Use one path, with Agent::ptx_plan, for device code where device-side // kernel launches are supported. The other path, with // get_agent_plan_impl::get(version), is for host code and for device - // code without device-side kernel launches. NVCC and Feta check for - // these situations differently. - #ifdef _NVHPC_CUDA - #ifdef __THRUST_HAS_CUDART__ - if (CUB_IS_DEVICE_CODE) { - return typename get_plan::type(typename Agent::ptx_plan()); - } else - #endif - { - return get_agent_plan_impl::get(ptx_version); - } - #else - #if (CUB_PTX_ARCH > 0) && defined(__THRUST_HAS_CUDART__) - typedef typename get_plan::type Plan; + // code without device-side kernel launches. +#ifdef __THRUST_HAS_CUDART__ + NV_IF_TARGET( + NV_IS_DEVICE, + ( THRUST_UNUSED_VAR(ptx_version); - // We're on device, use default policy - return Plan(typename Agent::ptx_plan()); - #else - return get_agent_plan_impl::get(ptx_version); - #endif - #endif + using plan_type = typename get_plan::type; + using ptx_plan = typename Agent::ptx_plan; + return plan_type{ptx_plan{}}; + ), // NV_IS_HOST: + ( return get_agent_plan_impl::get(ptx_version); )); +#else + return get_agent_plan_impl::get(ptx_version); +#endif } // XXX keep this dead-code for now as a gentle reminder @@ -606,12 +601,11 @@ namespace core { template class cuda_optional { - cudaError_t status_; - T value_; + cudaError_t status_{cudaSuccess}; + T value_{}; public: - __host__ __device__ - cuda_optional() : status_(cudaSuccess) {} + cuda_optional() = default; __host__ __device__ cuda_optional(T v, cudaError_t status = cudaSuccess) : status_(status), value_(v) {} diff --git a/thrust/system/cuda/detail/get_value.h b/thrust/system/cuda/detail/get_value.h index ebca7b5e7..9065f773a 100644 --- a/thrust/system/cuda/detail/get_value.h +++ b/thrust/system/cuda/detail/get_value.h @@ -24,6 +24,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace cuda_cub { @@ -62,24 +64,10 @@ inline __host__ __device__ } }; - // The usual pattern for separating host and device code doesn't work here - // because it would result in a compiler warning, either about falling off - // the end of a non-void function, or about result_type's default constructor - // being a host-only function. - #ifdef _NVHPC_CUDA - if (THRUST_IS_HOST_CODE) { - return war_nvbugs_881631::host_path(exec, ptr); - } else { - return war_nvbugs_881631::device_path(exec, ptr); - } - #else - #ifndef __CUDA_ARCH__ - return war_nvbugs_881631::host_path(exec, ptr); - #else - return war_nvbugs_881631::device_path(exec, ptr); - #endif // __CUDA_ARCH__ - #endif - } // end get_value_msvc2005_war() + NV_IF_TARGET(NV_IS_HOST, + (return war_nvbugs_881631::host_path(exec, ptr);), + (return war_nvbugs_881631::device_path(exec, ptr);)) +} // end get_value_msvc2005_war() } // end anon namespace diff --git a/thrust/system/cuda/detail/iter_swap.h b/thrust/system/cuda/detail/iter_swap.h index 60c40231c..c0628610a 100644 --- a/thrust/system/cuda/detail/iter_swap.h +++ b/thrust/system/cuda/detail/iter_swap.h @@ -26,6 +26,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace cuda_cub { @@ -50,15 +52,12 @@ void iter_swap(thrust::cuda::execution_policy &, Pointer1 a, Poin } }; - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - war_nvbugs_881631::host_path(a, b); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - war_nvbugs_881631::device_path(a, b); - #endif - } + NV_IF_TARGET(NV_IS_HOST, ( + war_nvbugs_881631::host_path(a, b); + ), ( + war_nvbugs_881631::device_path(a, b); + )); + } // end iter_swap() diff --git a/thrust/system/cuda/detail/malloc_and_free.h b/thrust/system/cuda/detail/malloc_and_free.h index ac5b0f871..1b12e2cc3 100644 --- a/thrust/system/cuda/detail/malloc_and_free.h +++ b/thrust/system/cuda/detail/malloc_and_free.h @@ -23,13 +23,16 @@ #include #include #include -#ifdef THRUST_CACHING_DEVICE_MALLOC -#include -#endif #include #include #include +#ifdef THRUST_CACHING_DEVICE_MALLOC +#include +#endif + +#include + THRUST_NAMESPACE_BEGIN namespace cuda_cub { @@ -53,26 +56,35 @@ void *malloc(execution_policy &, std::size_t n) { void *result = 0; - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - #ifdef __CUB_CACHING_MALLOC - cub::CachingDeviceAllocator &alloc = get_allocator(); - cudaError_t status = alloc.DeviceAllocate(&result, n); - #else - cudaError_t status = cudaMalloc(&result, n); - #endif - - if(status != cudaSuccess) - { - cudaGetLastError(); // Clear global CUDA error state. - throw thrust::system::detail::bad_alloc(thrust::cuda_category().message(status).c_str()); - } - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - result = thrust::raw_pointer_cast(thrust::malloc(thrust::seq, n)); - #endif - } + // need to repeat a lot of code here because we can't use #if inside of the + // NV_IF_TARGET macro. + // The device path is the same either way, but the host allocations differ. +#ifdef __CUB_CACHING_MALLOC + NV_IF_TARGET(NV_IS_HOST, ( + cub::CachingDeviceAllocator &alloc = get_allocator(); + cudaError_t status = alloc.DeviceAllocate(&result, n); + + if (status != cudaSuccess) + { + cudaGetLastError(); // Clear global CUDA error state. + throw thrust::system::detail::bad_alloc(thrust::cuda_category().message(status).c_str()); + } + ), ( // NV_IS_DEVICE + result = thrust::raw_pointer_cast(thrust::malloc(thrust::seq, n)); + )); +#else // not __CUB_CACHING_MALLOC + NV_IF_TARGET(NV_IS_HOST, ( + cudaError_t status = cudaMalloc(&result, n); + + if (status != cudaSuccess) + { + cudaGetLastError(); // Clear global CUDA error state. + throw thrust::system::detail::bad_alloc(thrust::cuda_category().message(status).c_str()); + } + ), ( // NV_IS_DEVICE + result = thrust::raw_pointer_cast(thrust::malloc(thrust::seq, n)); + )); +#endif return result; } // end malloc() @@ -82,21 +94,25 @@ template __host__ __device__ void free(execution_policy &, Pointer ptr) { - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - #ifdef __CUB_CACHING_MALLOC - cub::CachingDeviceAllocator &alloc = get_allocator(); - cudaError_t status = alloc.DeviceFree(thrust::raw_pointer_cast(ptr)); - #else - cudaError_t status = cudaFree(thrust::raw_pointer_cast(ptr)); - #endif - cuda_cub::throw_on_error(status, "device free failed"); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - thrust::free(thrust::seq, ptr); - #endif - } + // need to repeat a lot of code here because we can't use #if inside of the + // NV_IF_TARGET macro. + // The device path is the same either way, but the host deallocations differ. +#ifdef __CUB_CACHING_MALLOC + NV_IF_TARGET(NV_IS_HOST, ( + cub::CachingDeviceAllocator &alloc = get_allocator(); + cudaError_t status = alloc.DeviceFree(thrust::raw_pointer_cast(ptr)); + cuda_cub::throw_on_error(status, "device free failed"); + ), ( // NV_IS_DEVICE + thrust::free(thrust::seq, ptr); + )); +#else // not __CUB_CACHING_MALLOC + NV_IF_TARGET(NV_IS_HOST, ( + cudaError_t status = cudaFree(thrust::raw_pointer_cast(ptr)); + cuda_cub::throw_on_error(status, "device free failed"); + ), ( // NV_IS_DEVICE + thrust::free(thrust::seq, ptr); + )); +#endif } // end free() } // namespace cuda_cub diff --git a/thrust/system/cuda/detail/par.h b/thrust/system/cuda/detail/par.h index bd5953139..42c701ca7 100644 --- a/thrust/system/cuda/detail/par.h +++ b/thrust/system/cuda/detail/par.h @@ -48,6 +48,7 @@ struct execute_on_stream_base : execution_policy cudaStream_t stream; public: + __thrust_exec_check_disable__ __host__ __device__ execute_on_stream_base(cudaStream_t stream_ = default_stream()) : stream(stream_){} diff --git a/thrust/system/cuda/detail/util.h b/thrust/system/cuda/detail/util.h index 5c564dc98..1b6580271 100644 --- a/thrust/system/cuda/detail/util.h +++ b/thrust/system/cuda/detail/util.h @@ -35,9 +35,11 @@ #include #include +#include -THRUST_NAMESPACE_BEGIN +#include +THRUST_NAMESPACE_BEGIN namespace cuda_cub { inline __host__ __device__ @@ -94,25 +96,7 @@ __host__ __device__ cudaError_t synchronize_stream(execution_policy &policy) { - cudaError_t result; - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - cudaStreamSynchronize(stream(policy)); - result = cudaGetLastError(); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - #if __THRUST_HAS_CUDART__ - THRUST_UNUSED_VAR(policy); - cub::detail::device_synchronize(); - result = cudaGetLastError(); - #else - THRUST_UNUSED_VAR(policy); - result = cudaSuccess; - #endif - #endif - } - return result; + return cub::SyncStream(stream(policy)); } // Entry point/interface. @@ -132,30 +116,16 @@ cudaError_t synchronize_stream_optional(execution_policy &policy) { cudaError_t result; - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - if(must_perform_optional_synchronization(policy)){ - cudaStreamSynchronize(stream(policy)); - result = cudaGetLastError(); - }else{ - result = cudaSuccess; - } - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - #if __THRUST_HAS_CUDART__ - if(must_perform_optional_synchronization(policy)){ - cub::detail::device_synchronize(); - result = cudaGetLastError(); - }else{ - result = cudaSuccess; - } - #else - THRUST_UNUSED_VAR(policy); - result = cudaSuccess; - #endif - #endif + + if (must_perform_optional_synchronization(policy)) + { + result = synchronize_stream(policy); + } + else + { + result = cudaSuccess; } + return result; } @@ -230,15 +200,7 @@ trivial_copy_device_to_device(Policy & policy, inline void __host__ __device__ terminate() { - if (THRUST_IS_DEVICE_CODE) { - #if THRUST_INCLUDE_DEVICE_CODE - asm("trap;"); - #endif - } else { - #if THRUST_INCLUDE_HOST_CODE - std::terminate(); - #endif - } + NV_IF_TARGET(NV_IS_HOST, (std::terminate();), (asm("trap;");)); } __host__ __device__ @@ -252,23 +214,33 @@ inline void throw_on_error(cudaError_t status) if (cudaSuccess != status) { - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - throw thrust::system_error(status, thrust::cuda_category()); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - #if __THRUST_HAS_CUDART__ - printf("Thrust CUDA backend error: %s: %s\n", - cudaGetErrorName(status), - cudaGetErrorString(status)); - #else - printf("Thrust CUDA backend error: %d\n", - static_cast(status)); - #endif - cuda_cub::terminate(); - #endif - } + + // Can't use #if inside NV_IF_TARGET, use a temp macro to hoist the device + // instructions out of the target logic. +#if __THRUST_HAS_CUDART__ + +#define THRUST_TEMP_DEVICE_CODE \ + printf("Thrust CUDA backend error: %s: %s\n", \ + cudaGetErrorName(status), \ + cudaGetErrorString(status)) + +#else + +#define THRUST_TEMP_DEVICE_CODE \ + printf("Thrust CUDA backend error: %d\n", \ + static_cast(status)) + +#endif + + NV_IF_TARGET(NV_IS_HOST, ( + throw thrust::system_error(status, thrust::cuda_category()); + ), ( + THRUST_TEMP_DEVICE_CODE; + cuda_cub::terminate(); + )); + +#undef THRUST_TEMP_DEVICE_CODE + } } @@ -283,25 +255,34 @@ inline void throw_on_error(cudaError_t status, char const *msg) if (cudaSuccess != status) { - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - throw thrust::system_error(status, thrust::cuda_category(), msg); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - #if __THRUST_HAS_CUDART__ - printf("Thrust CUDA backend error: %s: %s: %s\n", - cudaGetErrorName(status), - cudaGetErrorString(status), - msg); - #else - printf("Thrust CUDA backend error: %d: %s \n", - static_cast(status), - msg); - #endif - cuda_cub::terminate(); - #endif - } + // Can't use #if inside NV_IF_TARGET, use a temp macro to hoist the device + // instructions out of the target logic. +#if __THRUST_HAS_CUDART__ + +#define THRUST_TEMP_DEVICE_CODE \ + printf("Thrust CUDA backend error: %s: %s: %s\n", \ + cudaGetErrorName(status), \ + cudaGetErrorString(status),\ + msg) + +#else + +#define THRUST_TEMP_DEVICE_CODE \ + printf("Thrust CUDA backend error: %d: %s\n", \ + static_cast(status), \ + msg) + +#endif + + NV_IF_TARGET(NV_IS_HOST, ( + throw thrust::system_error(status, thrust::cuda_category(), msg); + ), ( + THRUST_TEMP_DEVICE_CODE; + cuda_cub::terminate(); + )); + +#undef THRUST_TEMP_DEVICE_CODE + } } diff --git a/thrust/system/detail/sequential/sort.inl b/thrust/system/detail/sequential/sort.inl index 01920aa6e..241a860af 100644 --- a/thrust/system/detail/sequential/sort.inl +++ b/thrust/system/detail/sequential/sort.inl @@ -24,6 +24,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system { @@ -164,14 +166,14 @@ void stable_sort(sequential::execution_policy &exec, { // the compilation time of stable_primitive_sort is too expensive to use within a single CUDA thread -#ifndef __CUDA_ARCH__ - typedef typename thrust::iterator_traits::value_type KeyType; - sort_detail::use_primitive_sort use_primitive_sort; -#else - thrust::detail::false_type use_primitive_sort; -#endif - - sort_detail::stable_sort(exec, first, last, comp, use_primitive_sort); + NV_IF_TARGET(NV_IS_HOST, ( + using KeyType = thrust::iterator_value_t; + sort_detail::use_primitive_sort use_primitive_sort; + sort_detail::stable_sort(exec, first, last, comp, use_primitive_sort); + ), ( // NV_IS_DEVICE: + thrust::detail::false_type use_primitive_sort; + sort_detail::stable_sort(exec, first, last, comp, use_primitive_sort); + )); } @@ -188,14 +190,14 @@ void stable_sort_by_key(sequential::execution_policy &exec, { // the compilation time of stable_primitive_sort_by_key is too expensive to use within a single CUDA thread -#ifndef __CUDA_ARCH__ - typedef typename thrust::iterator_traits::value_type KeyType; - sort_detail::use_primitive_sort use_primitive_sort; -#else - thrust::detail::false_type use_primitive_sort; -#endif - - sort_detail::stable_sort_by_key(exec, first1, last1, first2, comp, use_primitive_sort); + NV_IF_TARGET(NV_IS_HOST, ( + using KeyType = thrust::iterator_value_t; + sort_detail::use_primitive_sort use_primitive_sort; + sort_detail::stable_sort_by_key(exec, first1, last1, first2, comp, use_primitive_sort); + ), ( // NV_IS_DEVICE: + thrust::detail::false_type use_primitive_sort; + sort_detail::stable_sort_by_key(exec, first1, last1, first2, comp, use_primitive_sort); + )); } diff --git a/thrust/system/detail/sequential/stable_merge_sort.inl b/thrust/system/detail/sequential/stable_merge_sort.inl index 7dcf03f59..02f384afb 100644 --- a/thrust/system/detail/sequential/stable_merge_sort.inl +++ b/thrust/system/detail/sequential/stable_merge_sort.inl @@ -24,6 +24,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system { @@ -355,16 +357,12 @@ void stable_merge_sort(sequential::execution_policy &exec, RandomAccessIterator last, StrictWeakOrdering comp) { - if (THRUST_IS_DEVICE_CODE) { - #if THRUST_INCLUDE_DEVICE_CODE - // avoid recursion in CUDA threads - stable_merge_sort_detail::iterative_stable_merge_sort(exec, first, last, comp); - #endif - } else { - #if THRUST_INCLUDE_HOST_CODE - stable_merge_sort_detail::recursive_stable_merge_sort(exec, first, last, comp); - #endif - } + NV_IF_TARGET(NV_IS_DEVICE, ( + // avoid recursion in CUDA threads + stable_merge_sort_detail::iterative_stable_merge_sort(exec, first, last, comp); + ), ( + stable_merge_sort_detail::recursive_stable_merge_sort(exec, first, last, comp); + )); } @@ -379,16 +377,12 @@ void stable_merge_sort_by_key(sequential::execution_policy &exec, RandomAccessIterator2 first2, StrictWeakOrdering comp) { - if (THRUST_IS_DEVICE_CODE) { - #if THRUST_INCLUDE_DEVICE_CODE - // avoid recursion in CUDA threads - stable_merge_sort_detail::iterative_stable_merge_sort_by_key(exec, first1, last1, first2, comp); - #endif - } else { - #if THRUST_INCLUDE_HOST_CODE - stable_merge_sort_detail::recursive_stable_merge_sort_by_key(exec, first1, last1, first2, comp); - #endif - } + NV_IF_TARGET(NV_IS_DEVICE, ( + // avoid recursion in CUDA threads + stable_merge_sort_detail::iterative_stable_merge_sort_by_key(exec, first1, last1, first2, comp); + ), ( + stable_merge_sort_detail::recursive_stable_merge_sort_by_key(exec, first1, last1, first2, comp); + )); } diff --git a/thrust/system/detail/sequential/trivial_copy.h b/thrust/system/detail/sequential/trivial_copy.h index cefb18938..ea55c8fd2 100644 --- a/thrust/system/detail/sequential/trivial_copy.h +++ b/thrust/system/detail/sequential/trivial_copy.h @@ -24,6 +24,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system { @@ -40,16 +42,14 @@ __host__ __device__ T *result) { T* return_value = NULL; - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - std::memmove(result, first, n * sizeof(T)); - return_value = result + n; - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - return_value = thrust::system::detail::sequential::general_copy_n(first, n, result); - #endif - } + + NV_IF_TARGET(NV_IS_HOST, ( + std::memmove(result, first, n * sizeof(T)); + return_value = result + n; + ), ( // NV_IS_DEVICE: + return_value = thrust::system::detail::sequential::general_copy_n(first, n, result); + )); + return return_value; } // end trivial_copy_n()