From 97e63f9a046a1b767b81bda5650ed94d38011e87 Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Fri, 14 Jan 2022 14:55:48 -0500 Subject: [PATCH 01/10] Add libcudacxx submodule, initialized to version 1.8.0. --- .gitmodules | 3 + cmake/ThrustInstallRules.cmake | 50 ++++++++++------- dependencies/libcudacxx | 1 + thrust/cmake/thrust-config.cmake | 94 +++++++++++++++++++++++++++----- 4 files changed, 113 insertions(+), 35 deletions(-) create mode 160000 dependencies/libcudacxx diff --git a/.gitmodules b/.gitmodules index 1d8e604ef..0bb39f302 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "cub"] path = dependencies/cub url = ../cub.git +[submodule "libcudacxx"] + path = dependencies/libcudacxx + url = ../libcudacxx.git diff --git a/cmake/ThrustInstallRules.cmake b/cmake/ThrustInstallRules.cmake index 93084c11d..993dba153 100644 --- a/cmake/ThrustInstallRules.cmake +++ b/cmake/ThrustInstallRules.cmake @@ -24,27 +24,35 @@ configure_file("${Thrust_SOURCE_DIR}/thrust/cmake/thrust-header-search.cmake.in" install(FILES "${Thrust_BINARY_DIR}/thrust/cmake/thrust-header-search.cmake" DESTINATION "${install_location}") -# Depending on how Thrust is configured, CUB's CMake scripts may or may not be -# included, so maintain a set of CUB install rules in both projects. By default -# CUB headers are installed alongside Thrust -- this may be disabled by turning -# off THRUST_INSTALL_CUB_HEADERS. -option(THRUST_INSTALL_CUB_HEADERS "Include cub headers when installing." ON) +# Depending on how Thrust is configured, libcudacxx and CUB's CMake scripts may +# or may not be include()'d, so force include their install rules when requested. +# By default, these projects are installed alongside Thrust. This is controlled by +# THRUST_INSTALL_CUB_HEADERS and THRUST_INSTALL_LIBCUDACXX_HEADERS. +option(THRUST_INSTALL_CUB_HEADERS "Include CUB headers when installing." ON) if (THRUST_INSTALL_CUB_HEADERS) - install(DIRECTORY "${Thrust_SOURCE_DIR}/dependencies/cub/cub" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" - FILES_MATCHING - PATTERN "*.cuh" - ) + # Use a function to limit scope of the CUB_*_DIR vars: + function(_thrust_install_cub_headers) + # Fake these for the logic in CUBInstallRules.cmake: + set(CUB_SOURCE_DIR "${Thrust_SOURCE_DIR}/dependencies/cub/") + set(CUB_BINARY_DIR "${Thrust_BINARY_DIR}/cub-config/") + set(CUB_ENABLE_INSTALL_RULES ON) + set(CUB_IN_THRUST OFF) + include("${Thrust_SOURCE_DIR}/dependencies/cub/cmake/CubInstallRules.cmake") + endfunction() - # Need to configure a file to store THRUST_INSTALL_HEADER_INFIX - install(DIRECTORY "${Thrust_SOURCE_DIR}/dependencies/cub/cub/cmake/" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/cub" - PATTERN cub-header-search EXCLUDE - ) - set(install_location "${CMAKE_INSTALL_LIBDIR}/cmake/cub") - configure_file("${Thrust_SOURCE_DIR}/dependencies/cub/cub/cmake/cub-header-search.cmake.in" - "${Thrust_BINARY_DIR}/dependencies/cub/cub/cmake/cub-header-search.cmake" - @ONLY) - install(FILES "${Thrust_BINARY_DIR}/dependencies/cub/cub/cmake/cub-header-search.cmake" - DESTINATION "${install_location}") + _thrust_install_cub_headers() +endif() + +option(THRUST_INSTALL_LIBCUDACXX_HEADERS "Include libcudacxx headers when installing." ON) +if (THRUST_INSTALL_LIBCUDACXX_HEADERS) + # Use a function to limit scope of the libcudacxx_*_DIR vars: + function(_thrust_install_libcudacxx_headers) + # Fake these for the logic in libcudacxxInstallRules.cmake: + set(libcudacxx_SOURCE_DIR "${Thrust_SOURCE_DIR}/dependencies/libcudacxx/") + set(libcudacxx_BINARY_DIR "${Thrust_BINARY_DIR}/libcudacxx-config/") + set(libcudacxx_ENABLE_INSTALL_RULES ON) + include("${Thrust_SOURCE_DIR}/dependencies/libcudacxx/cmake/libcudacxxInstallRules.cmake") + endfunction() + + _thrust_install_libcudacxx_headers() endif() diff --git a/dependencies/libcudacxx b/dependencies/libcudacxx new file mode 160000 index 000000000..05d48aaa1 --- /dev/null +++ b/dependencies/libcudacxx @@ -0,0 +1 @@ +Subproject commit 05d48aaa12a3c310c333298331c41a9214f08f22 diff --git a/thrust/cmake/thrust-config.cmake b/thrust/cmake/thrust-config.cmake index f7589f6cc..71180b3a4 100644 --- a/thrust/cmake/thrust-config.cmake +++ b/thrust/cmake/thrust-config.cmake @@ -37,14 +37,15 @@ # [ADVANCED] # Optionally mark options as advanced # ) # -# # Use a custom TBB, CUB, and/or OMP +# # Use a custom TBB, CUB, libcudacxx, and/or OMP # # (Note that once set, these cannot be changed. This includes COMPONENT # # preloading and lazy lookups in thrust_create_target) # find_package(Thrust REQUIRED) # thrust_set_CUB_target(MyCUBTarget) # MyXXXTarget contains an existing # thrust_set_TBB_target(MyTBBTarget) # interface to XXX for Thrust to use. +# thrust_set_libcudacxx_target(MyLibcudacxxTarget) # thrust_set_OMP_target(MyOMPTarget) -# thrust_create_target(ThrustWithMyCUB DEVICE CUDA) +# thrust_create_target(ThrustWithMyCUBAndLibcudacxx DEVICE CUDA) # thrust_create_target(ThrustWithMyTBB DEVICE TBB) # thrust_create_target(ThrustWithMyOMP DEVICE OMP) # @@ -77,6 +78,9 @@ cmake_minimum_required(VERSION 3.15) +# Minimum supported libcudacxx version: +set(thrust_libcudacxx_version 1.8.0) + ################################################################################ # User variables and APIs. Users can rely on these: # @@ -346,14 +350,15 @@ function(thrust_debug_internal_targets) _thrust_debug_backend_targets(CPP "Thrust ${THRUST_VERSION}") - _thrust_debug_backend_targets(CUDA "CUB ${THRUST_CUB_VERSION}") - thrust_debug_target(CUB::CUB "${THRUST_CUB_VERSION}") + _thrust_debug_backend_targets(OMP "${THRUST_OMP_VERSION}") + thrust_debug_target(OpenMP::OpenMP_CXX "${THRUST_OMP_VERSION}") _thrust_debug_backend_targets(TBB "${THRUST_TBB_VERSION}") thrust_debug_target(TBB:tbb "${THRUST_TBB_VERSION}") - _thrust_debug_backend_targets(OMP "${THRUST_OMP_VERSION}") - thrust_debug_target(OpenMP::OpenMP_CXX "${THRUST_OMP_VERSION}") + _thrust_debug_backend_targets(CUDA "CUB ${THRUST_CUB_VERSION}") + thrust_debug_target(CUB::CUB "${THRUST_CUB_VERSION}") + thrust_debug_target(libcudacxx::libcudacxx "${THRUST_libcudacxx_VERSION}") endfunction() ################################################################################ @@ -434,18 +439,37 @@ function(_thrust_setup_system backend) endif() endfunction() -# Use the provided cub_target for the CUDA backend. If Thrust::CUDA already +# Use the provided cub_target for the CUDA backend. If Thrust::CUB already # exists, this call has no effect. function(thrust_set_CUB_target cub_target) - if (NOT TARGET Thrust::CUDA) + if (NOT TARGET Thrust::CUB) thrust_debug("Setting CUB target to ${cub_target}" internal) # Workaround cmake issue #20670 https://gitlab.kitware.com/cmake/cmake/-/issues/20670 - set(THRUST_CUB_VERSION ${CUB_VERSION} CACHE INTERNAL "CUB version used by Thrust") - _thrust_declare_interface_alias(Thrust::CUDA _Thrust_CUDA) - target_link_libraries(_Thrust_CUDA INTERFACE Thrust::Thrust ${cub_target}) + set(THRUST_CUB_VERSION ${CUB_VERSION} CACHE INTERNAL + "CUB version used by Thrust" + FORCE + ) + _thrust_declare_interface_alias(Thrust::CUB _Thrust_CUB) + target_link_libraries(_Thrust_CUB INTERFACE ${cub_target}) thrust_debug_target(${cub_target} "${THRUST_CUB_VERSION}" internal) - thrust_debug_target(Thrust::CUDA "CUB ${THRUST_CUB_VERSION}" internal) - _thrust_setup_system(CUDA) + thrust_debug_target(Thrust::CUB "CUB ${THRUST_CUB_VERSION}" internal) + endif() +endfunction() + +# Use the provided libcudacxx_target for the CUDA backend. If Thrust::libcudacxx +# already exists, this call has no effect. +function(thrust_set_libcudacxx_target libcudacxx_target) + if (NOT TARGET Thrust::libcudacxx) + thrust_debug("Setting libcudacxx target to ${libcudacxx_target}" internal) + # Workaround cmake issue #20670 https://gitlab.kitware.com/cmake/cmake/-/issues/20670 + set(THRUST_libcudacxx_VERSION ${libcudacxx_VERSION} CACHE INTERNAL + "libcudacxx version used by Thrust" + FORCE + ) + _thrust_declare_interface_alias(Thrust::libcudacxx _Thrust_libcudacxx) + target_link_libraries(_Thrust_libcudacxx INTERFACE ${libcudacxx_target}) + thrust_debug_target(${libcudacxx_target} "${THRUST_libcudacxx_VERSION}" internal) + thrust_debug_target(Thrust::libcudacxx "libcudacxx ${THRUST_libcudacxx_VERSION}" internal) endif() endfunction() @@ -495,7 +519,7 @@ endfunction() # #20670 -- otherwise variables like CUB_VERSION, etc won't be in the caller's # scope. macro(_thrust_find_CUDA required) - if (NOT TARGET Thrust::CUDA) + if (NOT TARGET Thrust::CUB) thrust_debug("Searching for CUB ${required}" internal) find_package(CUB ${THRUST_VERSION} CONFIG ${_THRUST_QUIET_FLAG} @@ -513,6 +537,16 @@ macro(_thrust_find_CUDA required) thrust_debug("CUB not found!" internal) endif() endif() + + if (NOT TARGET Thrust::CUDA) + _thrust_declare_interface_alias(Thrust::CUDA _Thrust_CUDA) + _thrust_setup_system(CUDA) + target_link_libraries(_Thrust_CUDA INTERFACE + Thrust::Thrust + Thrust::CUB + ) + thrust_debug_target(Thrust::CUDA "" internal) + endif() endmacro() # This must be a macro instead of a function to ensure that backends passed to @@ -640,6 +674,38 @@ if (NOT TARGET Thrust::Thrust) thrust_debug_target(Thrust::Thrust "${THRUST_VERSION}" internal) endif() +# Find libcudacxx prior to locating backend-specific deps. This ensures that CUB +# finds the same package. +if (NOT TARGET Thrust::libcudacxx) + thrust_debug("Searching for libcudacxx REQUIRED" internal) + + # First do a non-required search for any co-packaged versions. + # These are preferred. + find_package(libcudacxx ${thrust_libcudacxx_version} CONFIG + ${_THRUST_QUIET_FLAG} + NO_DEFAULT_PATH # Only check the explicit HINTS below: + HINTS + "${_THRUST_INCLUDE_DIR}/dependencies/libcudacxx" # Source layout (GitHub) + "${_THRUST_INCLUDE_DIR}/../libcudacxx" # Source layout (Perforce) + "${_THRUST_CMAKE_DIR}/.." # Install layout + ) + + # A second required search allows externally packaged to be used and fails if + # no suitable package exists. + find_package(libcudacxx ${thrust_libcudacxx_version} CONFIG + REQUIRED + ${_THRUST_QUIET_FLAG} + ) + + if (TARGET libcudacxx::libcudacxx) + thrust_set_libcudacxx_target(libcudacxx::libcudacxx) + else() + thrust_debug("Expected libcudacxx::libcudacxx target not found!" internal) + endif() + + target_link_libraries(_Thrust_Thrust INTERFACE Thrust::libcudacxx) +endif() + # Handle find_package COMPONENT requests: foreach(component ${${CMAKE_FIND_PACKAGE_NAME}_FIND_COMPONENTS}) if (NOT component IN_LIST THRUST_HOST_SYSTEM_OPTIONS AND From b19385ab8b8998e372811283e243222c04714305 Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Wed, 23 Mar 2022 15:37:32 -0400 Subject: [PATCH 02/10] Style fixes for thrust-config.cmake. --- thrust/cmake/thrust-config.cmake | 50 ++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/thrust/cmake/thrust-config.cmake b/thrust/cmake/thrust-config.cmake index 71180b3a4..b9efd2676 100644 --- a/thrust/cmake/thrust-config.cmake +++ b/thrust/cmake/thrust-config.cmake @@ -89,19 +89,21 @@ set(thrust_libcudacxx_version 1.8.0) set(THRUST_HOST_SYSTEM_OPTIONS CPP OMP TBB CACHE INTERNAL "Valid Thrust host systems." + FORCE ) set(THRUST_DEVICE_SYSTEM_OPTIONS CUDA CPP OMP TBB CACHE INTERNAL "Valid Thrust device systems" + FORCE ) # Workaround cmake issue #20670 https://gitlab.kitware.com/cmake/cmake/-/issues/20670 -set(THRUST_VERSION ${${CMAKE_FIND_PACKAGE_NAME}_VERSION} CACHE INTERNAL "") -set(THRUST_VERSION_MAJOR ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_MAJOR} CACHE INTERNAL "") -set(THRUST_VERSION_MINOR ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_MINOR} CACHE INTERNAL "") -set(THRUST_VERSION_PATCH ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_PATCH} CACHE INTERNAL "") -set(THRUST_VERSION_TWEAK ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_TWEAK} CACHE INTERNAL "") -set(THRUST_VERSION_COUNT ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_COUNT} CACHE INTERNAL "") +set(THRUST_VERSION ${${CMAKE_FIND_PACKAGE_NAME}_VERSION} CACHE INTERNAL "" FORCE) +set(THRUST_VERSION_MAJOR ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_MAJOR} CACHE INTERNAL "" FORCE) +set(THRUST_VERSION_MINOR ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_MINOR} CACHE INTERNAL "" FORCE) +set(THRUST_VERSION_PATCH ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_PATCH} CACHE INTERNAL "" FORCE) +set(THRUST_VERSION_TWEAK ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_TWEAK} CACHE INTERNAL "" FORCE) +set(THRUST_VERSION_COUNT ${${CMAKE_FIND_PACKAGE_NAME}_VERSION_COUNT} CACHE INTERNAL "" FORCE) function(thrust_create_target target_name) thrust_debug("Assembling target ${target_name}. Options: ${ARGN}" internal) @@ -113,7 +115,7 @@ function(thrust_create_target target_name) IGNORE_DEPRECATED_COMPILER IGNORE_DEPRECATED_CPP_11 IGNORE_DEPRECATED_CPP_DIALECT - ) + ) set(keys DEVICE DEVICE_OPTION @@ -121,13 +123,13 @@ function(thrust_create_target target_name) HOST HOST_OPTION HOST_OPTION_DOC - ) + ) cmake_parse_arguments(TCT "${options}" "${keys}" "" ${ARGN}) if (TCT_UNPARSED_ARGUMENTS) message(AUTHOR_WARNING "Unrecognized arguments passed to thrust_create_target: " ${TCT_UNPARSED_ARGUMENTS} - ) + ) endif() # Check that the main Thrust internal target is available @@ -137,7 +139,7 @@ function(thrust_create_target target_name) message(AUTHOR_WARNING "The `thrust_create_target` function was called outside the scope of the " "thrust targets. Call find_package again to recreate targets." - ) + ) endif() _thrust_set_if_undefined(TCT_HOST CPP) @@ -149,12 +151,14 @@ function(thrust_create_target target_name) if (NOT TCT_HOST IN_LIST THRUST_HOST_SYSTEM_OPTIONS) message(FATAL_ERROR - "Requested HOST=${TCT_HOST}; must be one of ${THRUST_HOST_SYSTEM_OPTIONS}") + "Requested HOST=${TCT_HOST}; must be one of ${THRUST_HOST_SYSTEM_OPTIONS}" + ) endif() if (NOT TCT_DEVICE IN_LIST THRUST_DEVICE_SYSTEM_OPTIONS) message(FATAL_ERROR - "Requested DEVICE=${TCT_DEVICE}; must be one of ${THRUST_DEVICE_SYSTEM_OPTIONS}") + "Requested DEVICE=${TCT_DEVICE}; must be one of ${THRUST_DEVICE_SYSTEM_OPTIONS}" + ) endif() if (TCT_FROM_OPTIONS) @@ -176,7 +180,7 @@ function(thrust_create_target target_name) # We can just create an INTERFACE IMPORTED target here instead of going # through _thrust_declare_interface_alias as long as we aren't hanging any - # Thrust/CUB include paths on ${target_name}. + # Thrust/CUB include paths directly on ${target_name}. add_library(${target_name} INTERFACE IMPORTED) target_link_libraries(${target_name} INTERFACE @@ -479,7 +483,10 @@ function(thrust_set_TBB_target tbb_target) if (NOT TARGET Thrust::TBB) thrust_debug("Setting TBB target to ${tbb_target}" internal) # Workaround cmake issue #20670 https://gitlab.kitware.com/cmake/cmake/-/issues/20670 - set(THRUST_TBB_VERSION ${TBB_VERSION} CACHE INTERNAL "TBB version used by Thrust") + set(THRUST_TBB_VERSION ${TBB_VERSION} CACHE INTERNAL + "TBB version used by Thrust" + FORCE + ) _thrust_declare_interface_alias(Thrust::TBB _Thrust_TBB) target_link_libraries(_Thrust_TBB INTERFACE Thrust::Thrust ${tbb_target}) thrust_debug_target(${tbb_target} "${THRUST_TBB_VERSION}" internal) @@ -494,7 +501,10 @@ function(thrust_set_OMP_target omp_target) if (NOT TARGET Thrust::OMP) thrust_debug("Setting OMP target to ${omp_target}" internal) # Workaround cmake issue #20670 https://gitlab.kitware.com/cmake/cmake/-/issues/20670 - set(THRUST_OMP_VERSION ${OpenMP_CXX_VERSION} CACHE INTERNAL "OpenMP version used by Thrust") + set(THRUST_OMP_VERSION ${OpenMP_CXX_VERSION} CACHE INTERNAL + "OpenMP version used by Thrust" + FORCE + ) _thrust_declare_interface_alias(Thrust::OMP _Thrust_OMP) target_link_libraries(_Thrust_OMP INTERFACE Thrust::Thrust ${omp_target}) thrust_debug_target(${omp_target} "${THRUST_OMP_VERSION}" internal) @@ -653,14 +663,17 @@ endmacro() # if (${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) - set(_THRUST_QUIET ON CACHE INTERNAL "Quiet mode enabled for Thrust find_package calls.") - set(_THRUST_QUIET_FLAG "QUIET" CACHE INTERNAL "") + set(_THRUST_QUIET ON CACHE INTERNAL "Quiet mode enabled for Thrust find_package calls." FORCE) + set(_THRUST_QUIET_FLAG "QUIET" CACHE INTERNAL "" FORCE) else() unset(_THRUST_QUIET CACHE) unset(_THRUST_QUIET_FLAG CACHE) endif() -set(_THRUST_CMAKE_DIR "${CMAKE_CURRENT_LIST_DIR}" CACHE INTERNAL "Location of thrust-config.cmake") +set(_THRUST_CMAKE_DIR "${CMAKE_CURRENT_LIST_DIR}" CACHE INTERNAL + "Location of thrust-config.cmake" + FORCE +) # Internal target that actually holds the Thrust interface. Used by all other Thrust targets. if (NOT TARGET Thrust::Thrust) @@ -668,6 +681,7 @@ if (NOT TARGET Thrust::Thrust) # Pull in the include dir detected by thrust-config-version.cmake set(_THRUST_INCLUDE_DIR "${_THRUST_VERSION_INCLUDE_DIR}" CACHE INTERNAL "Location of Thrust headers." + FORCE ) unset(_THRUST_VERSION_INCLUDE_DIR CACHE) # Clear tmp variable from cache target_include_directories(_Thrust_Thrust INTERFACE "${_THRUST_INCLUDE_DIR}") From 807e9e0775705012e1db5d77ec4b5669d64def2e Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Fri, 26 Mar 2021 16:06:34 -0400 Subject: [PATCH 03/10] Bump CUB for NV_IF_TARGET refactor. --- dependencies/cub | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependencies/cub b/dependencies/cub index 56dcb06d0..4de961aee 160000 --- a/dependencies/cub +++ b/dependencies/cub @@ -1 +1 @@ -Subproject commit 56dcb06d0cd7f923c373a27a7f9993722e0f50b4 +Subproject commit 4de961aee49c894e9c380d7c2f7e750016976f00 From 539c9fa221620bd53d6d6c08d2a99ec866faab9a Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Tue, 10 May 2022 16:54:09 -0400 Subject: [PATCH 04/10] Remove `thrust_set_libcudacxx_target` function from CMake user API. There's no way for a user to meaningfully use this, since libcudacxx is a required dependency. It is checked during the initial `find_package(Thrust)` call, before the user would have access to Thrust's CMake API. Updated the CMake README.md with instructions for using an explicit libcudacxx target. --- thrust/cmake/README.md | 19 +++++++++++++++---- thrust/cmake/thrust-config.cmake | 14 +++++++------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/thrust/cmake/README.md b/thrust/cmake/README.md index c85a8c857..ae296b635 100644 --- a/thrust/cmake/README.md +++ b/thrust/cmake/README.md @@ -101,7 +101,7 @@ find_package(Thrust 1.9.10.1 EXACT) would only match the 1.9.10.1 release. -#### Using a Specific TBB or OpenMP Environment +#### Using an Explicit TBB or OpenMP CMake Target When `thrust_create_target` is called, it will lazily load the requested systems on-demand through internal `find_package` calls. If a project already @@ -112,9 +112,20 @@ thrust_set_TBB_target(MyTBBTarget) thrust_set_OMP_target(MyOMPTarget) ``` -These functions must be called **before** `thrust_create_target`, and will -have no effect if the dependency is loaded as a -`find_package(Thrust COMPONENT [...])` component. +These functions must be called **before** the corresponding system is loaded +through `thrust_create_target` or `find_package(Thrust COMPONENT [OMP|TBB])`. + +#### Using an Explicit libcu++ CMake Target + +In contrast to the optional TBB/OMP dependencies, there is no +`thrust_set_libcudacxx_target` function that specifies an explicit libcu++ +target. This is because libcu++ is always required and must be found during the +initial `find_target(Thrust)` call that defines these functions. + +To force Thrust to use a specific libcu++ target, ensure that either the +`Thrust::libcudacxx` or `libcudacxx::libcudacxx` targets are defined prior to +the first invocation of `find_package(Thrust)`. Thrust will automatically use +these, giving preference to the `Thrust::libcudacxx` target. #### Testing for Systems diff --git a/thrust/cmake/thrust-config.cmake b/thrust/cmake/thrust-config.cmake index b9efd2676..fe88a961c 100644 --- a/thrust/cmake/thrust-config.cmake +++ b/thrust/cmake/thrust-config.cmake @@ -37,15 +37,14 @@ # [ADVANCED] # Optionally mark options as advanced # ) # -# # Use a custom TBB, CUB, libcudacxx, and/or OMP +# # Use a custom TBB, CUB, and/or OMP # # (Note that once set, these cannot be changed. This includes COMPONENT # # preloading and lazy lookups in thrust_create_target) # find_package(Thrust REQUIRED) # thrust_set_CUB_target(MyCUBTarget) # MyXXXTarget contains an existing # thrust_set_TBB_target(MyTBBTarget) # interface to XXX for Thrust to use. -# thrust_set_libcudacxx_target(MyLibcudacxxTarget) # thrust_set_OMP_target(MyOMPTarget) -# thrust_create_target(ThrustWithMyCUBAndLibcudacxx DEVICE CUDA) +# thrust_create_target(ThrustWithMyCUB DEVICE CUDA) # thrust_create_target(ThrustWithMyTBB DEVICE TBB) # thrust_create_target(ThrustWithMyOMP DEVICE OMP) # @@ -460,9 +459,10 @@ function(thrust_set_CUB_target cub_target) endif() endfunction() -# Use the provided libcudacxx_target for the CUDA backend. If Thrust::libcudacxx -# already exists, this call has no effect. -function(thrust_set_libcudacxx_target libcudacxx_target) +# Internal use only -- libcudacxx must be found during the initial +# `find_package(Thrust)` call and cannot be set afterwards. See README.md in +# this directory for details on using a specific libcudacxx target. +function(_thrust_set_libcudacxx_target libcudacxx_target) if (NOT TARGET Thrust::libcudacxx) thrust_debug("Setting libcudacxx target to ${libcudacxx_target}" internal) # Workaround cmake issue #20670 https://gitlab.kitware.com/cmake/cmake/-/issues/20670 @@ -712,7 +712,7 @@ if (NOT TARGET Thrust::libcudacxx) ) if (TARGET libcudacxx::libcudacxx) - thrust_set_libcudacxx_target(libcudacxx::libcudacxx) + _thrust_set_libcudacxx_target(libcudacxx::libcudacxx) else() thrust_debug("Expected libcudacxx::libcudacxx target not found!" internal) endif() From 9e4f0a338236eba4043623c2d416039eab56c9c8 Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Fri, 26 Mar 2021 16:10:15 -0400 Subject: [PATCH 05/10] Remove checks for obsolete architectures. --- testing/cuda/pair_sort.cu | 22 +++-------- testing/cuda/pair_sort_by_key.cu | 24 ++++-------- testing/cuda/partition.cu | 66 ++++++++++++-------------------- testing/cuda/sort.cu | 24 ++++-------- testing/cuda/sort_by_key.cu | 29 +++++--------- thrust/detail/type_traits.h | 8 ---- thrust/system/cuda/config.h | 2 +- 7 files changed, 55 insertions(+), 120 deletions(-) diff --git a/testing/cuda/pair_sort.cu b/testing/cuda/pair_sort.cu index 87838e429..35a6b67e3 100644 --- a/testing/cuda/pair_sort.cu +++ b/testing/cuda/pair_sort.cu @@ -4,16 +4,11 @@ #include -template +template __global__ -void stable_sort_kernel(ExecutionPolicy exec, Iterator1 first, Iterator1 last, Iterator2 is_supported) +void stable_sort_kernel(ExecutionPolicy exec, Iterator first, Iterator last) { -#if (__CUDA_ARCH__ >= 200) - *is_supported = true; thrust::stable_sort(exec, first, last); -#else - *is_supported = false; -#endif } @@ -43,19 +38,14 @@ void TestPairStableSortDevice(ExecutionPolicy exec) thrust::device_vector

d_pairs = h_pairs; - thrust::device_vector is_supported(1); - - stable_sort_kernel<<<1,1>>>(exec, d_pairs.begin(), d_pairs.end(), is_supported.begin()); + stable_sort_kernel<<<1,1>>>(exec, d_pairs.begin(), d_pairs.end()); cudaError_t const err = cudaDeviceSynchronize(); ASSERT_EQUAL(cudaSuccess, err); - if(is_supported[0]) - { - // sort on the host - thrust::stable_sort(h_pairs.begin(), h_pairs.end()); + // sort on the host + thrust::stable_sort(h_pairs.begin(), h_pairs.end()); - ASSERT_EQUAL_QUIET(h_pairs, d_pairs); - } + ASSERT_EQUAL_QUIET(h_pairs, d_pairs); }; diff --git a/testing/cuda/pair_sort_by_key.cu b/testing/cuda/pair_sort_by_key.cu index 19996e5a2..59908eef4 100644 --- a/testing/cuda/pair_sort_by_key.cu +++ b/testing/cuda/pair_sort_by_key.cu @@ -6,16 +6,11 @@ #include -template +template __global__ -void stable_sort_by_key_kernel(ExecutionPolicy exec, Iterator1 keys_first, Iterator1 keys_last, Iterator2 values_first, Iterator3 is_supported) +void stable_sort_by_key_kernel(ExecutionPolicy exec, Iterator1 keys_first, Iterator1 keys_last, Iterator2 values_first) { -#if (__CUDA_ARCH__ >= 200) - *is_supported = true; thrust::stable_sort_by_key(exec, keys_first, keys_last, values_first); -#else - *is_supported = false; -#endif } @@ -51,21 +46,16 @@ void TestPairStableSortByKeyDevice(ExecutionPolicy exec) thrust::device_vector

d_pairs = h_pairs; thrust::device_vector d_values = h_values; - thrust::device_vector is_supported(1); - // sort on the device - stable_sort_by_key_kernel<<<1,1>>>(exec, d_pairs.begin(), d_pairs.end(), d_values.begin(), is_supported.begin()); + stable_sort_by_key_kernel<<<1,1>>>(exec, d_pairs.begin(), d_pairs.end(), d_values.begin()); cudaError_t const err = cudaDeviceSynchronize(); ASSERT_EQUAL(cudaSuccess, err); - if(is_supported[0]) - { - // sort on the host - thrust::stable_sort_by_key(h_pairs.begin(), h_pairs.end(), h_values.begin()); + // sort on the host + thrust::stable_sort_by_key(h_pairs.begin(), h_pairs.end(), h_values.begin()); - ASSERT_EQUAL_QUIET(h_pairs, d_pairs); - ASSERT_EQUAL(h_values, d_values); - } + ASSERT_EQUAL_QUIET(h_pairs, d_pairs); + ASSERT_EQUAL(h_values, d_values); }; diff --git a/testing/cuda/partition.cu b/testing/cuda/partition.cu index 2da7d35d2..f9ec48600 100644 --- a/testing/cuda/partition.cu +++ b/testing/cuda/partition.cu @@ -286,16 +286,11 @@ void TestPartitionCopyStencilDeviceNoSync() DECLARE_UNITTEST(TestPartitionCopyStencilDeviceNoSync); -template +template __global__ -void stable_partition_kernel(ExecutionPolicy exec, Iterator1 first, Iterator1 last, Predicate pred, Iterator2 result, Iterator3 is_supported) +void stable_partition_kernel(ExecutionPolicy exec, Iterator1 first, Iterator1 last, Predicate pred, Iterator2 result) { -#if (__CUDA_ARCH__ >= 200) - *is_supported = true; *result = thrust::stable_partition(exec, first, last, pred); -#else - *is_supported = false; -#endif } @@ -313,24 +308,20 @@ void TestStablePartitionDevice(ExecutionPolicy exec) data[4] = 2; thrust::device_vector result(1); - thrust::device_vector is_supported(1); - - stable_partition_kernel<<<1,1>>>(exec, data.begin(), data.end(), is_even(), result.begin(), is_supported.begin()); + + stable_partition_kernel<<<1,1>>>(exec, data.begin(), data.end(), is_even(), result.begin()); cudaError_t const err = cudaDeviceSynchronize(); ASSERT_EQUAL(cudaSuccess, err); - if(is_supported[0]) - { - thrust::device_vector ref(5); - ref[0] = 2; - ref[1] = 2; - ref[2] = 1; - ref[3] = 1; - ref[4] = 1; + thrust::device_vector ref(5); + ref[0] = 2; + ref[1] = 2; + ref[2] = 1; + ref[3] = 1; + ref[4] = 1; - ASSERT_EQUAL(2, (iterator)result[0] - data.begin()); - ASSERT_EQUAL(ref, data); - } + ASSERT_EQUAL(2, (iterator)result[0] - data.begin()); + ASSERT_EQUAL(ref, data); } @@ -355,16 +346,11 @@ void TestStablePartitionDeviceNoSync() DECLARE_UNITTEST(TestStablePartitionDeviceNoSync); -template +template __global__ -void stable_partition_kernel(ExecutionPolicy exec, Iterator1 first, Iterator1 last, Iterator2 stencil_first, Predicate pred, Iterator3 result, Iterator4 is_supported) +void stable_partition_kernel(ExecutionPolicy exec, Iterator1 first, Iterator1 last, Iterator2 stencil_first, Predicate pred, Iterator3 result) { -#if (__CUDA_ARCH__ >= 200) - *is_supported = true; *result = thrust::stable_partition(exec, first, last, stencil_first, pred); -#else - *is_supported = false; -#endif } @@ -389,24 +375,20 @@ void TestStablePartitionStencilDevice(ExecutionPolicy exec) stencil[4] = 2; thrust::device_vector result(1); - thrust::device_vector is_supported(1); - - stable_partition_kernel<<<1,1>>>(exec, data.begin(), data.end(), stencil.begin(), is_even(), result.begin(), is_supported.begin()); + + stable_partition_kernel<<<1,1>>>(exec, data.begin(), data.end(), stencil.begin(), is_even(), result.begin()); cudaError_t const err = cudaDeviceSynchronize(); ASSERT_EQUAL(cudaSuccess, err); - if(is_supported[0]) - { - thrust::device_vector ref(5); - ref[0] = 1; - ref[1] = 1; - ref[2] = 0; - ref[3] = 0; - ref[4] = 0; + thrust::device_vector ref(5); + ref[0] = 1; + ref[1] = 1; + ref[2] = 0; + ref[3] = 0; + ref[4] = 0; - ASSERT_EQUAL(2, (iterator)result[0] - data.begin()); - ASSERT_EQUAL(ref, data); - } + ASSERT_EQUAL(2, (iterator)result[0] - data.begin()); + ASSERT_EQUAL(ref, data); } diff --git a/testing/cuda/sort.cu b/testing/cuda/sort.cu index 7f3d6413c..1d341011f 100644 --- a/testing/cuda/sort.cu +++ b/testing/cuda/sort.cu @@ -4,16 +4,11 @@ #include -template +template __global__ -void sort_kernel(ExecutionPolicy exec, Iterator first, Iterator last, Compare comp, Iterator2 is_supported) +void sort_kernel(ExecutionPolicy exec, Iterator first, Iterator last, Compare comp) { -#if (__CUDA_ARCH__ >= 200) - *is_supported = true; thrust::sort(exec, first, last, comp); -#else - *is_supported = false; -#endif } @@ -34,19 +29,14 @@ void TestComparisonSortDevice(ExecutionPolicy exec, const size_t n, Compare comp thrust::host_vector h_data = unittest::random_integers(n); thrust::device_vector d_data = h_data; - thrust::device_vector is_supported(1); - - sort_kernel<<<1,1>>>(exec, d_data.begin(), d_data.end(), comp, is_supported.begin()); + sort_kernel<<<1,1>>>(exec, d_data.begin(), d_data.end(), comp); cudaError_t const err = cudaDeviceSynchronize(); ASSERT_EQUAL(cudaSuccess, err); - if(is_supported[0]) - { - thrust::sort(h_data.begin(), h_data.end(), comp); - - ASSERT_EQUAL(h_data, d_data); - } + thrust::sort(h_data.begin(), h_data.end(), comp); + + ASSERT_EQUAL(h_data, d_data); }; @@ -163,7 +153,7 @@ void TestComparisonSortCudaStreams() cudaStreamSynchronize(s); ASSERT_EQUAL(true, thrust::is_sorted(keys.begin(), keys.end(), my_less())); - + cudaStreamDestroy(s); } DECLARE_UNITTEST(TestComparisonSortCudaStreams); diff --git a/testing/cuda/sort_by_key.cu b/testing/cuda/sort_by_key.cu index 1e848879b..8863be27a 100644 --- a/testing/cuda/sort_by_key.cu +++ b/testing/cuda/sort_by_key.cu @@ -4,16 +4,11 @@ #include -template +template __global__ -void sort_by_key_kernel(ExecutionPolicy exec, Iterator1 keys_first, Iterator1 keys_last, Iterator2 values_first, Compare comp, Iterator3 is_supported) +void sort_by_key_kernel(ExecutionPolicy exec, Iterator1 keys_first, Iterator1 keys_last, Iterator2 values_first, Compare comp) { -#if (__CUDA_ARCH__ >= 200) - *is_supported = true; thrust::sort_by_key(exec, keys_first, keys_last, values_first, comp); -#else - *is_supported = false; -#endif } @@ -36,19 +31,15 @@ void TestComparisonSortByKeyDevice(ExecutionPolicy exec, const size_t n, Compare thrust::host_vector h_values = h_keys; thrust::device_vector d_values = d_keys; - - thrust::device_vector is_supported(1); - sort_by_key_kernel<<<1,1>>>(exec, d_keys.begin(), d_keys.end(), d_values.begin(), comp, is_supported.begin()); + + sort_by_key_kernel<<<1,1>>>(exec, d_keys.begin(), d_keys.end(), d_values.begin(), comp); cudaError_t const err = cudaDeviceSynchronize(); ASSERT_EQUAL(cudaSuccess, err); - if(is_supported[0]) - { - thrust::sort_by_key(h_keys.begin(), h_keys.end(), h_values.begin(), comp); - - ASSERT_EQUAL(h_keys, d_keys); - ASSERT_EQUAL(h_values, d_values); - } + thrust::sort_by_key(h_keys.begin(), h_keys.end(), h_values.begin(), comp); + + ASSERT_EQUAL(h_keys, d_keys); + ASSERT_EQUAL(h_values, d_values); }; @@ -139,7 +130,7 @@ void TestComparisonSortByKeyCudaStreams() ASSERT_EQUAL(true, thrust::is_sorted(keys.begin(), keys.end())); ASSERT_EQUAL(true, thrust::is_sorted(vals.begin(), vals.end())); - + cudaStreamDestroy(s); } DECLARE_UNITTEST(TestComparisonSortByKeyCudaStreams); @@ -169,7 +160,7 @@ void TestSortByKeyCudaStreams() ASSERT_EQUAL(true, thrust::is_sorted(keys.begin(), keys.end())); ASSERT_EQUAL(true, thrust::is_sorted(vals.begin(), vals.end())); - + cudaStreamDestroy(s); } DECLARE_UNITTEST(TestSortByKeyCudaStreams); diff --git a/thrust/detail/type_traits.h b/thrust/detail/type_traits.h index d147f8328..5596f569e 100644 --- a/thrust/detail/type_traits.h +++ b/thrust/detail/type_traits.h @@ -568,15 +568,7 @@ template struct largest_available_float { -#if defined(__CUDA_ARCH__) -# if (__CUDA_ARCH__ < 130) - typedef float type; -# else typedef double type; -# endif -#else - typedef double type; -#endif }; // T1 wins if they are both the same size diff --git a/thrust/system/cuda/config.h b/thrust/system/cuda/config.h index 734e47bad..c0ba0d77b 100644 --- a/thrust/system/cuda/config.h +++ b/thrust/system/cuda/config.h @@ -33,7 +33,7 @@ #include #if defined(__CUDACC__) || defined(_NVHPC_CUDA) -# if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__>= 350 && defined(__CUDACC_RDC__)) +# if !defined(__CUDA_ARCH__) || defined(__CUDACC_RDC__) # define __THRUST_HAS_CUDART__ 1 # define THRUST_RUNTIME_FUNCTION __host__ __device__ __forceinline__ # else From 3ea8940359d5ca3d5657447d917b400c0486a71f Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Fri, 26 Mar 2021 16:10:35 -0400 Subject: [PATCH 06/10] Refactor to use NV_IF_TARGET. --- testing/allocator.cu | 10 +- testing/device_delete.cu | 14 +- testing/uninitialized_copy.cu | 15 +- testing/uninitialized_fill.cu | 15 +- testing/unittest/runtime_static_assert.h | 11 +- thrust/detail/allocator/no_throw_allocator.h | 24 +-- .../detail/allocator/temporary_allocator.inl | 23 ++- thrust/detail/config/cpp_compatibility.h | 43 +++-- thrust/detail/contiguous_storage.inl | 25 ++- thrust/detail/integer_math.h | 41 +++-- thrust/detail/memory_algorithms.h | 109 +++++++----- thrust/system/cuda/config.h | 10 +- thrust/system/cuda/detail/assign_value.h | 32 ++-- .../system/cuda/detail/core/agent_launcher.h | 31 +--- thrust/system/cuda/detail/core/util.h | 43 +++-- thrust/system/cuda/detail/get_value.h | 24 +-- thrust/system/cuda/detail/iter_swap.h | 17 +- thrust/system/cuda/detail/malloc_and_free.h | 92 +++++----- thrust/system/cuda/detail/util.h | 157 ++++++++---------- thrust/system/detail/sequential/sort.inl | 34 ++-- .../detail/sequential/stable_merge_sort.inl | 34 ++-- .../system/detail/sequential/trivial_copy.h | 20 +-- 22 files changed, 407 insertions(+), 417 deletions(-) diff --git a/testing/allocator.cu b/testing/allocator.cu index a29408de9..0317a2b31 100644 --- a/testing/allocator.cu +++ b/testing/allocator.cu @@ -2,6 +2,9 @@ #include #include #include + +#include + #include template @@ -80,9 +83,7 @@ struct my_allocator_with_custom_destroy __host__ __device__ void destroy(T *) { -#if !__CUDA_ARCH__ - g_state = true; -#endif + NV_IF_TARGET(NV_IS_HOST, (g_state = true;)); } value_type *allocate(std::ptrdiff_t n) @@ -203,7 +204,6 @@ void TestAllocatorTraitsRebind() } DECLARE_UNITTEST(TestAllocatorTraitsRebind); -#if THRUST_CPP_DIALECT >= 2011 void TestAllocatorTraitsRebindCpp11() { ASSERT_EQUAL( @@ -251,5 +251,3 @@ void TestAllocatorTraitsRebindCpp11() ); } DECLARE_UNITTEST(TestAllocatorTraitsRebindCpp11); -#endif // C++11 - diff --git a/testing/device_delete.cu b/testing/device_delete.cu index 6684cb2b5..12f757fa4 100644 --- a/testing/device_delete.cu +++ b/testing/device_delete.cu @@ -4,21 +4,23 @@ #include #include +#include + struct Foo { __host__ __device__ Foo(void) - :set_me_upon_destruction(0) + : set_me_upon_destruction{nullptr} {} __host__ __device__ ~Foo(void) { -#ifdef __CUDA_ARCH__ - // __device__ overload - if(set_me_upon_destruction != 0) - *set_me_upon_destruction = true; -#endif + NV_IF_TARGET(NV_IS_DEVICE, ( + if (set_me_upon_destruction != nullptr) + { + *set_me_upon_destruction = true; + })); } bool *set_me_upon_destruction; diff --git a/testing/uninitialized_copy.cu b/testing/uninitialized_copy.cu index 7455d8c81..62a79cdc9 100644 --- a/testing/uninitialized_copy.cu +++ b/testing/uninitialized_copy.cu @@ -3,6 +3,7 @@ #include #include +#include template ForwardIterator uninitialized_copy(my_system &system, @@ -147,13 +148,13 @@ struct CopyConstructTest __host__ __device__ CopyConstructTest(const CopyConstructTest &) { -#if __CUDA_ARCH__ - copy_constructed_on_device = true; - copy_constructed_on_host = false; -#else - copy_constructed_on_device = false; - copy_constructed_on_device = true; -#endif + NV_IF_TARGET(NV_IS_DEVICE, ( + copy_constructed_on_device = true; + copy_constructed_on_host = false; + ), ( + copy_constructed_on_device = false; + copy_constructed_on_host = true; + )); } __host__ __device__ diff --git a/testing/uninitialized_fill.cu b/testing/uninitialized_fill.cu index facd6fe6f..8fbb97002 100644 --- a/testing/uninitialized_fill.cu +++ b/testing/uninitialized_fill.cu @@ -3,6 +3,7 @@ #include #include +#include template void uninitialized_fill(my_system &system, @@ -156,13 +157,13 @@ struct CopyConstructTest __host__ __device__ CopyConstructTest(const CopyConstructTest &) { -#if __CUDA_ARCH__ - copy_constructed_on_device = true; - copy_constructed_on_host = false; -#else - copy_constructed_on_device = false; - copy_constructed_on_host = true; -#endif + NV_IF_TARGET(NV_IS_DEVICE, ( + copy_constructed_on_device = true; + copy_constructed_on_host = false; + ), ( + copy_constructed_on_device = false; + copy_constructed_on_host = true; + )); } __host__ __device__ diff --git a/testing/unittest/runtime_static_assert.h b/testing/unittest/runtime_static_assert.h index 3e7b60290..d53bd3b20 100644 --- a/testing/unittest/runtime_static_assert.h +++ b/testing/unittest/runtime_static_assert.h @@ -18,8 +18,11 @@ namespace unittest #include #include +#include + #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA + #define ASSERT_STATIC_ASSERT(X) \ { \ bool triggered = false; \ @@ -86,11 +89,9 @@ namespace unittest { static_assert_exception ex(filename, lineno); -#ifdef __CUDA_ARCH__ - *detail::device_exception = ex; -#else - throw ex; -#endif + NV_IF_TARGET(NV_IS_DEVICE, + (*detail::device_exception = ex;), + (throw ex;)); } } } diff --git a/thrust/detail/allocator/no_throw_allocator.h b/thrust/detail/allocator/no_throw_allocator.h index ea158d77f..a6c16985b 100644 --- a/thrust/detail/allocator/no_throw_allocator.h +++ b/thrust/detail/allocator/no_throw_allocator.h @@ -18,6 +18,8 @@ #include +#include + THRUST_NAMESPACE_BEGIN namespace detail { @@ -43,18 +45,18 @@ template __host__ __device__ void deallocate(typename super_t::pointer p, typename super_t::size_type n) { -#ifndef __CUDA_ARCH__ - try - { + NV_IF_TARGET(NV_IS_HOST, ( + try + { + super_t::deallocate(p, n); + } // end try + catch(...) + { + // catch anything + } // end catch + ), ( super_t::deallocate(p, n); - } // end try - catch(...) - { - // catch anything - } // end catch -#else - super_t::deallocate(p, n); -#endif + )); } // end deallocate() inline __host__ __device__ diff --git a/thrust/detail/allocator/temporary_allocator.inl b/thrust/detail/allocator/temporary_allocator.inl index 75aa7b9dc..609b0d318 100644 --- a/thrust/detail/allocator/temporary_allocator.inl +++ b/thrust/detail/allocator/temporary_allocator.inl @@ -22,10 +22,13 @@ #include #include -#if (defined(_NVHPC_CUDA) || defined(__CUDA_ARCH__)) && \ - THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA +#include + +#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA +#if (defined(_NVHPC_CUDA) || defined(__CUDA_ARCH__)) #include -#endif +#endif // NVCC device pass or NVC++ +#endif // CUDA THRUST_NAMESPACE_BEGIN namespace detail @@ -47,15 +50,11 @@ __host__ __device__ // note that we pass cnt to deallocate, not a value derived from result.second deallocate(result.first, cnt); - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - throw thrust::system::detail::bad_alloc("temporary_buffer::allocate: get_temporary_buffer failed"); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE && THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA - thrust::system::cuda::detail::terminate_with_message("temporary_buffer::allocate: get_temporary_buffer failed"); - #endif - } + NV_IF_TARGET(NV_IS_HOST, ( + throw thrust::system::detail::bad_alloc("temporary_buffer::allocate: get_temporary_buffer failed"); + ), ( // NV_IS_DEVICE + thrust::system::cuda::detail::terminate_with_message("temporary_buffer::allocate: get_temporary_buffer failed"); + )); } // end if return result.first; diff --git a/thrust/detail/config/cpp_compatibility.h b/thrust/detail/config/cpp_compatibility.h index d924f79cf..18b9cbdcf 100644 --- a/thrust/detail/config/cpp_compatibility.h +++ b/thrust/detail/config/cpp_compatibility.h @@ -73,20 +73,29 @@ # endif #endif -#if defined(_NVHPC_CUDA) -# define THRUST_IS_DEVICE_CODE __builtin_is_device_code() -# define THRUST_IS_HOST_CODE (!__builtin_is_device_code()) -# define THRUST_INCLUDE_DEVICE_CODE 1 -# define THRUST_INCLUDE_HOST_CODE 1 -#elif defined(__CUDA_ARCH__) -# define THRUST_IS_DEVICE_CODE 1 -# define THRUST_IS_HOST_CODE 0 -# define THRUST_INCLUDE_DEVICE_CODE 1 -# define THRUST_INCLUDE_HOST_CODE 0 -#else -# define THRUST_IS_DEVICE_CODE 0 -# define THRUST_IS_HOST_CODE 1 -# define THRUST_INCLUDE_DEVICE_CODE 0 -# define THRUST_INCLUDE_HOST_CODE 1 -#endif - +// These definitions were intended for internal use only and are now obsolete. +// If you relied on them, consider porting your code to use the functionality +// in libcu++'s header. +// For a temporary workaround, define THRUST_PROVIDE_LEGACY_ARCH_MACROS to make +// them available again. These should be considered deprecated and will be +// fully removed in a future version. +#ifdef THRUST_PROVIDE_LEGACY_ARCH_MACROS + #ifndef THRUST_IS_DEVICE_CODE + #if defined(_NVHPC_CUDA) + #define THRUST_IS_DEVICE_CODE __builtin_is_device_code() + #define THRUST_IS_HOST_CODE (!__builtin_is_device_code()) + #define THRUST_INCLUDE_DEVICE_CODE 1 + #define THRUST_INCLUDE_HOST_CODE 1 + #elif defined(__CUDA_ARCH__) + #define THRUST_IS_DEVICE_CODE 1 + #define THRUST_IS_HOST_CODE 0 + #define THRUST_INCLUDE_DEVICE_CODE 1 + #define THRUST_INCLUDE_HOST_CODE 0 + #else + #define THRUST_IS_DEVICE_CODE 0 + #define THRUST_IS_HOST_CODE 1 + #define THRUST_INCLUDE_DEVICE_CODE 0 + #define THRUST_INCLUDE_HOST_CODE 1 + #endif + #endif +#endif // THRUST_PROVIDE_LEGACY_ARCH_MACROS diff --git a/thrust/detail/contiguous_storage.inl b/thrust/detail/contiguous_storage.inl index b82b83399..7ae8657f0 100644 --- a/thrust/detail/contiguous_storage.inl +++ b/thrust/detail/contiguous_storage.inl @@ -25,6 +25,8 @@ #include #include +#include + #include // for std::runtime_error #include // for use of std::swap in the WAR below @@ -432,19 +434,16 @@ __host__ __device__ void contiguous_storage ::swap_allocators(false_type, Alloc &other) { - if (THRUST_IS_DEVICE_CODE) { - #if THRUST_INCLUDE_DEVICE_CODE - // allocators must be equal when swapping containers with allocators that propagate on swap - assert(!is_allocator_not_equal(other)); - #endif - } else { - #if THRUST_INCLUDE_HOST_CODE - if (is_allocator_not_equal(other)) - { - throw allocator_mismatch_on_swap(); - } - #endif - } + NV_IF_TARGET(NV_IS_DEVICE, ( + // allocators must be equal when swapping containers with allocators that propagate on swap + assert(!is_allocator_not_equal(other)); + ), ( + if (is_allocator_not_equal(other)) + { + throw allocator_mismatch_on_swap(); + } + )); + thrust::swap(m_allocator, other); } // end contiguous_storage::swap_allocators() diff --git a/thrust/detail/integer_math.h b/thrust/detail/integer_math.h index 76887a1ea..0f8c8aac1 100644 --- a/thrust/detail/integer_math.h +++ b/thrust/detail/integer_math.h @@ -17,14 +17,13 @@ #pragma once #include -#include +#include -#if THRUST_CPP_DIALECT >= 2011 - #include -#endif +#include -THRUST_NAMESPACE_BEGIN +#include +THRUST_NAMESPACE_BEGIN namespace detail { @@ -33,25 +32,23 @@ __host__ __device__ __thrust_forceinline__ Integer clz(Integer x) { Integer result; - if (THRUST_IS_DEVICE_CODE) { - #if THRUST_INCLUDE_DEVICE_CODE - result = ::__clz(x); - #endif - } else { - #if THRUST_INCLUDE_HOST_CODE - int num_bits = 8 * sizeof(Integer); - int num_bits_minus_one = num_bits - 1; - result = num_bits; - for (int i = num_bits_minus_one; i >= 0; --i) + + NV_IF_TARGET(NV_IS_DEVICE, ( + result = ::__clz(x); + ), ( + int num_bits = 8 * sizeof(Integer); + int num_bits_minus_one = num_bits - 1; + result = num_bits; + for (int i = num_bits_minus_one; i >= 0; --i) + { + if ((Integer(1) << i) & x) { - if ((Integer(1) << i) & x) - { - result = num_bits_minus_one - i; - break; - } + result = num_bits_minus_one - i; + break; } - #endif - } + } + )); + return result; } diff --git a/thrust/detail/memory_algorithms.h b/thrust/detail/memory_algorithms.h index bc50f307c..2f6b3a81d 100644 --- a/thrust/detail/memory_algorithms.h +++ b/thrust/detail/memory_algorithms.h @@ -12,11 +12,14 @@ #include #include #include +#include #include +#include + #include #include -#include + THRUST_NAMESPACE_BEGIN @@ -102,7 +105,6 @@ ForwardIt destroy_n(Allocator const& alloc, ForwardIt first, Size n) return first; } -#if THRUST_CPP_DIALECT >= 2011 template __host__ __device__ void uninitialized_construct( @@ -112,17 +114,24 @@ void uninitialized_construct( using T = typename iterator_traits::value_type; ForwardIt current = first; - #if !__CUDA_ARCH__ // No exceptions in CUDA. - try { - #endif + + // No exceptions in CUDA. + NV_IF_TARGET(NV_IS_HOST, ( + try { + for (; current != last; ++current) + { + ::new (static_cast(addressof(*current))) T(args...); + } + } catch (...) { + destroy(first, current); + throw; + } + ), ( for (; current != last; ++current) + { ::new (static_cast(addressof(*current))) T(args...); - #if !__CUDA_ARCH__ // No exceptions in CUDA. - } catch (...) { - destroy(first, current); - throw; - } - #endif + } + )); } template @@ -140,17 +149,24 @@ void uninitialized_construct_with_allocator( typename traits::allocator_type alloc_T(alloc); ForwardIt current = first; - #if !__CUDA_ARCH__ // No exceptions in CUDA. - try { - #endif + + // No exceptions in CUDA. + NV_IF_TARGET(NV_IS_HOST, ( + try { + for (; current != last; ++current) + { + traits::construct(alloc_T, addressof(*current), args...); + } + } catch (...) { + destroy(alloc_T, first, current); + throw; + } + ), ( for (; current != last; ++current) + { traits::construct(alloc_T, addressof(*current), args...); - #if !__CUDA_ARCH__ // No exceptions in CUDA. - } catch (...) { - destroy(alloc_T, first, current); - throw; - } - #endif + } + )); } template @@ -161,17 +177,24 @@ void uninitialized_construct_n( using T = typename iterator_traits::value_type; ForwardIt current = first; - #if !__CUDA_ARCH__ // No exceptions in CUDA. - try { - #endif - for (; n > 0; (void) ++current, --n) + + // No exceptions in CUDA. + NV_IF_TARGET(NV_IS_HOST, ( + try { + for (; n > 0; ++current, --n) + { + ::new (static_cast(addressof(*current))) T(args...); + } + } catch (...) { + destroy(first, current); + throw; + } + ), ( + for (; n > 0; ++current, --n) + { ::new (static_cast(addressof(*current))) T(args...); - #if !__CUDA_ARCH__ // No exceptions in CUDA. - } catch (...) { - destroy(first, current); - throw; - } - #endif + } + )); } template @@ -189,19 +212,25 @@ void uninitialized_construct_n_with_allocator( typename traits::allocator_type alloc_T(alloc); ForwardIt current = first; - #if !__CUDA_ARCH__ // No exceptions in CUDA. - try { - #endif + + // No exceptions in CUDA. + NV_IF_TARGET(NV_IS_HOST, ( + try { + for (; n > 0; (void) ++current, --n) + { + traits::construct(alloc_T, addressof(*current), args...); + } + } catch (...) { + destroy(alloc_T, first, current); + throw; + } + ), ( for (; n > 0; (void) ++current, --n) + { traits::construct(alloc_T, addressof(*current), args...); - #if !__CUDA_ARCH__ // No exceptions in CUDA. - } catch (...) { - destroy(alloc_T, first, current); - throw; - } - #endif + } + )); } -#endif /////////////////////////////////////////////////////////////////////////////// diff --git a/thrust/system/cuda/config.h b/thrust/system/cuda/config.h index c0ba0d77b..251f8d180 100644 --- a/thrust/system/cuda/config.h +++ b/thrust/system/cuda/config.h @@ -45,9 +45,17 @@ # define THRUST_RUNTIME_FUNCTION __host__ __forceinline__ #endif +// These definitions were intended for internal use only and are now obsolete. +// If you relied on them, consider porting your code to use the functionality +// in libcu++'s header. +// For a temporary workaround, define THRUST_PROVIDE_LEGACY_ARCH_MACROS to make +// them available again. These should be considered deprecated and will be +// fully removed in a future version. +#ifdef THRUST_PROVIDE_LEGACY_ARCH_MACROS #ifdef __CUDA_ARCH__ #define THRUST_DEVICE_CODE -#endif +#endif // __CUDA_ARCH__ +#endif // THRUST_PROVIDE_LEGACY_ARCH_MACROS #ifdef THRUST_AGENT_ENTRY_NOINLINE #define THRUST_AGENT_ENTRY_INLINE_ATTR __noinline__ diff --git a/thrust/system/cuda/detail/assign_value.h b/thrust/system/cuda/detail/assign_value.h index 195493a4f..8945f1cac 100644 --- a/thrust/system/cuda/detail/assign_value.h +++ b/thrust/system/cuda/detail/assign_value.h @@ -24,6 +24,7 @@ #include #include +#include THRUST_NAMESPACE_BEGIN namespace cuda_cub { @@ -47,15 +48,12 @@ inline __host__ __device__ } }; - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - war_nvbugs_881631::host_path(exec,dst,src); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - war_nvbugs_881631::device_path(exec,dst,src); - #endif - } + NV_IF_TARGET(NV_IS_HOST, ( + war_nvbugs_881631::host_path(exec,dst,src); + ), ( + war_nvbugs_881631::device_path(exec,dst,src); + )); + } // end assign_value() @@ -83,20 +81,14 @@ inline __host__ __device__ } }; - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - war_nvbugs_881631::host_path(systems,dst,src); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - war_nvbugs_881631::device_path(systems,dst,src); - #endif - } + NV_IF_TARGET(NV_IS_HOST, ( + war_nvbugs_881631::host_path(systems,dst,src); + ), ( + war_nvbugs_881631::device_path(systems,dst,src); + )); } // end assign_value() - - } // end cuda_cub THRUST_NAMESPACE_END #endif diff --git a/thrust/system/cuda/detail/core/agent_launcher.h b/thrust/system/cuda/detail/core/agent_launcher.h index 8a79a87c7..4cdd7ff46 100644 --- a/thrust/system/cuda/detail/core/agent_launcher.h +++ b/thrust/system/cuda/detail/core/agent_launcher.h @@ -36,14 +36,7 @@ #include #include -#if 0 -#define __THRUST__TEMPLATE_DEBUG -#endif - -#if __THRUST__TEMPLATE_DEBUG -template class ID_impl; -template class Foo { ID_impl t;}; -#endif +#include THRUST_NAMESPACE_BEGIN namespace cuda_cub { @@ -521,15 +514,9 @@ namespace core { { if (debug_sync) { - if (THRUST_IS_DEVICE_CODE) { - #if THRUST_INCLUDE_DEVICE_CODE - cub::detail::device_synchronize(); - #endif - } else { - #if THRUST_INCLUDE_HOST_CODE - cudaStreamSynchronize(stream); - #endif - } + NV_IF_TARGET(NV_IS_HOST, + (cudaStreamSynchronize(stream);), + (cub::detail::device_synchronize();)); } } @@ -747,16 +734,6 @@ namespace core { void THRUST_RUNTIME_FUNCTION launch(Args... args) const { -#if __THRUST__TEMPLATE_DEBUG -#ifdef __CUDA_ARCH__ - typedef typename Foo< - shm1::v1, - shm1::v2, - shm1::v3, - shm1::v4, - shm1::v5>::t tt; -#endif -#endif launch_impl(has_enough_shmem_t(),args...); sync(); } diff --git a/thrust/system/cuda/detail/core/util.h b/thrust/system/cuda/detail/core/util.h index 83c05fd61..fd8821901 100644 --- a/thrust/system/cuda/detail/core/util.h +++ b/thrust/system/cuda/detail/core/util.h @@ -26,15 +26,17 @@ ******************************************************************************/ #pragma once -#include #include -#include -#include #include +#include #include +#include + #include -#include #include +#include + +#include THRUST_NAMESPACE_BEGIN @@ -356,27 +358,20 @@ namespace core { // Use one path, with Agent::ptx_plan, for device code where device-side // kernel launches are supported. The other path, with // get_agent_plan_impl::get(version), is for host code and for device - // code without device-side kernel launches. NVCC and Feta check for - // these situations differently. - #ifdef _NVHPC_CUDA - #ifdef __THRUST_HAS_CUDART__ - if (CUB_IS_DEVICE_CODE) { - return typename get_plan::type(typename Agent::ptx_plan()); - } else - #endif - { - return get_agent_plan_impl::get(ptx_version); - } - #else - #if (CUB_PTX_ARCH > 0) && defined(__THRUST_HAS_CUDART__) - typedef typename get_plan::type Plan; + // code without device-side kernel launches. +#ifdef __THRUST_HAS_CUDART__ + NV_IF_TARGET( + NV_IS_DEVICE, + ( THRUST_UNUSED_VAR(ptx_version); - // We're on device, use default policy - return Plan(typename Agent::ptx_plan()); - #else - return get_agent_plan_impl::get(ptx_version); - #endif - #endif + using plan_type = typename get_plan::type; + using ptx_plan = typename Agent::ptx_plan; + return plan_type{ptx_plan{}}; + ), // NV_IS_HOST: + ( return get_agent_plan_impl::get(ptx_version); )); +#else + return get_agent_plan_impl::get(ptx_version); +#endif } // XXX keep this dead-code for now as a gentle reminder diff --git a/thrust/system/cuda/detail/get_value.h b/thrust/system/cuda/detail/get_value.h index ebca7b5e7..9065f773a 100644 --- a/thrust/system/cuda/detail/get_value.h +++ b/thrust/system/cuda/detail/get_value.h @@ -24,6 +24,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace cuda_cub { @@ -62,24 +64,10 @@ inline __host__ __device__ } }; - // The usual pattern for separating host and device code doesn't work here - // because it would result in a compiler warning, either about falling off - // the end of a non-void function, or about result_type's default constructor - // being a host-only function. - #ifdef _NVHPC_CUDA - if (THRUST_IS_HOST_CODE) { - return war_nvbugs_881631::host_path(exec, ptr); - } else { - return war_nvbugs_881631::device_path(exec, ptr); - } - #else - #ifndef __CUDA_ARCH__ - return war_nvbugs_881631::host_path(exec, ptr); - #else - return war_nvbugs_881631::device_path(exec, ptr); - #endif // __CUDA_ARCH__ - #endif - } // end get_value_msvc2005_war() + NV_IF_TARGET(NV_IS_HOST, + (return war_nvbugs_881631::host_path(exec, ptr);), + (return war_nvbugs_881631::device_path(exec, ptr);)) +} // end get_value_msvc2005_war() } // end anon namespace diff --git a/thrust/system/cuda/detail/iter_swap.h b/thrust/system/cuda/detail/iter_swap.h index 60c40231c..c0628610a 100644 --- a/thrust/system/cuda/detail/iter_swap.h +++ b/thrust/system/cuda/detail/iter_swap.h @@ -26,6 +26,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace cuda_cub { @@ -50,15 +52,12 @@ void iter_swap(thrust::cuda::execution_policy &, Pointer1 a, Poin } }; - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - war_nvbugs_881631::host_path(a, b); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - war_nvbugs_881631::device_path(a, b); - #endif - } + NV_IF_TARGET(NV_IS_HOST, ( + war_nvbugs_881631::host_path(a, b); + ), ( + war_nvbugs_881631::device_path(a, b); + )); + } // end iter_swap() diff --git a/thrust/system/cuda/detail/malloc_and_free.h b/thrust/system/cuda/detail/malloc_and_free.h index ac5b0f871..1b12e2cc3 100644 --- a/thrust/system/cuda/detail/malloc_and_free.h +++ b/thrust/system/cuda/detail/malloc_and_free.h @@ -23,13 +23,16 @@ #include #include #include -#ifdef THRUST_CACHING_DEVICE_MALLOC -#include -#endif #include #include #include +#ifdef THRUST_CACHING_DEVICE_MALLOC +#include +#endif + +#include + THRUST_NAMESPACE_BEGIN namespace cuda_cub { @@ -53,26 +56,35 @@ void *malloc(execution_policy &, std::size_t n) { void *result = 0; - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - #ifdef __CUB_CACHING_MALLOC - cub::CachingDeviceAllocator &alloc = get_allocator(); - cudaError_t status = alloc.DeviceAllocate(&result, n); - #else - cudaError_t status = cudaMalloc(&result, n); - #endif - - if(status != cudaSuccess) - { - cudaGetLastError(); // Clear global CUDA error state. - throw thrust::system::detail::bad_alloc(thrust::cuda_category().message(status).c_str()); - } - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - result = thrust::raw_pointer_cast(thrust::malloc(thrust::seq, n)); - #endif - } + // need to repeat a lot of code here because we can't use #if inside of the + // NV_IF_TARGET macro. + // The device path is the same either way, but the host allocations differ. +#ifdef __CUB_CACHING_MALLOC + NV_IF_TARGET(NV_IS_HOST, ( + cub::CachingDeviceAllocator &alloc = get_allocator(); + cudaError_t status = alloc.DeviceAllocate(&result, n); + + if (status != cudaSuccess) + { + cudaGetLastError(); // Clear global CUDA error state. + throw thrust::system::detail::bad_alloc(thrust::cuda_category().message(status).c_str()); + } + ), ( // NV_IS_DEVICE + result = thrust::raw_pointer_cast(thrust::malloc(thrust::seq, n)); + )); +#else // not __CUB_CACHING_MALLOC + NV_IF_TARGET(NV_IS_HOST, ( + cudaError_t status = cudaMalloc(&result, n); + + if (status != cudaSuccess) + { + cudaGetLastError(); // Clear global CUDA error state. + throw thrust::system::detail::bad_alloc(thrust::cuda_category().message(status).c_str()); + } + ), ( // NV_IS_DEVICE + result = thrust::raw_pointer_cast(thrust::malloc(thrust::seq, n)); + )); +#endif return result; } // end malloc() @@ -82,21 +94,25 @@ template __host__ __device__ void free(execution_policy &, Pointer ptr) { - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - #ifdef __CUB_CACHING_MALLOC - cub::CachingDeviceAllocator &alloc = get_allocator(); - cudaError_t status = alloc.DeviceFree(thrust::raw_pointer_cast(ptr)); - #else - cudaError_t status = cudaFree(thrust::raw_pointer_cast(ptr)); - #endif - cuda_cub::throw_on_error(status, "device free failed"); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - thrust::free(thrust::seq, ptr); - #endif - } + // need to repeat a lot of code here because we can't use #if inside of the + // NV_IF_TARGET macro. + // The device path is the same either way, but the host deallocations differ. +#ifdef __CUB_CACHING_MALLOC + NV_IF_TARGET(NV_IS_HOST, ( + cub::CachingDeviceAllocator &alloc = get_allocator(); + cudaError_t status = alloc.DeviceFree(thrust::raw_pointer_cast(ptr)); + cuda_cub::throw_on_error(status, "device free failed"); + ), ( // NV_IS_DEVICE + thrust::free(thrust::seq, ptr); + )); +#else // not __CUB_CACHING_MALLOC + NV_IF_TARGET(NV_IS_HOST, ( + cudaError_t status = cudaFree(thrust::raw_pointer_cast(ptr)); + cuda_cub::throw_on_error(status, "device free failed"); + ), ( // NV_IS_DEVICE + thrust::free(thrust::seq, ptr); + )); +#endif } // end free() } // namespace cuda_cub diff --git a/thrust/system/cuda/detail/util.h b/thrust/system/cuda/detail/util.h index 5c564dc98..1b6580271 100644 --- a/thrust/system/cuda/detail/util.h +++ b/thrust/system/cuda/detail/util.h @@ -35,9 +35,11 @@ #include #include +#include -THRUST_NAMESPACE_BEGIN +#include +THRUST_NAMESPACE_BEGIN namespace cuda_cub { inline __host__ __device__ @@ -94,25 +96,7 @@ __host__ __device__ cudaError_t synchronize_stream(execution_policy &policy) { - cudaError_t result; - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - cudaStreamSynchronize(stream(policy)); - result = cudaGetLastError(); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - #if __THRUST_HAS_CUDART__ - THRUST_UNUSED_VAR(policy); - cub::detail::device_synchronize(); - result = cudaGetLastError(); - #else - THRUST_UNUSED_VAR(policy); - result = cudaSuccess; - #endif - #endif - } - return result; + return cub::SyncStream(stream(policy)); } // Entry point/interface. @@ -132,30 +116,16 @@ cudaError_t synchronize_stream_optional(execution_policy &policy) { cudaError_t result; - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - if(must_perform_optional_synchronization(policy)){ - cudaStreamSynchronize(stream(policy)); - result = cudaGetLastError(); - }else{ - result = cudaSuccess; - } - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - #if __THRUST_HAS_CUDART__ - if(must_perform_optional_synchronization(policy)){ - cub::detail::device_synchronize(); - result = cudaGetLastError(); - }else{ - result = cudaSuccess; - } - #else - THRUST_UNUSED_VAR(policy); - result = cudaSuccess; - #endif - #endif + + if (must_perform_optional_synchronization(policy)) + { + result = synchronize_stream(policy); + } + else + { + result = cudaSuccess; } + return result; } @@ -230,15 +200,7 @@ trivial_copy_device_to_device(Policy & policy, inline void __host__ __device__ terminate() { - if (THRUST_IS_DEVICE_CODE) { - #if THRUST_INCLUDE_DEVICE_CODE - asm("trap;"); - #endif - } else { - #if THRUST_INCLUDE_HOST_CODE - std::terminate(); - #endif - } + NV_IF_TARGET(NV_IS_HOST, (std::terminate();), (asm("trap;");)); } __host__ __device__ @@ -252,23 +214,33 @@ inline void throw_on_error(cudaError_t status) if (cudaSuccess != status) { - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - throw thrust::system_error(status, thrust::cuda_category()); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - #if __THRUST_HAS_CUDART__ - printf("Thrust CUDA backend error: %s: %s\n", - cudaGetErrorName(status), - cudaGetErrorString(status)); - #else - printf("Thrust CUDA backend error: %d\n", - static_cast(status)); - #endif - cuda_cub::terminate(); - #endif - } + + // Can't use #if inside NV_IF_TARGET, use a temp macro to hoist the device + // instructions out of the target logic. +#if __THRUST_HAS_CUDART__ + +#define THRUST_TEMP_DEVICE_CODE \ + printf("Thrust CUDA backend error: %s: %s\n", \ + cudaGetErrorName(status), \ + cudaGetErrorString(status)) + +#else + +#define THRUST_TEMP_DEVICE_CODE \ + printf("Thrust CUDA backend error: %d\n", \ + static_cast(status)) + +#endif + + NV_IF_TARGET(NV_IS_HOST, ( + throw thrust::system_error(status, thrust::cuda_category()); + ), ( + THRUST_TEMP_DEVICE_CODE; + cuda_cub::terminate(); + )); + +#undef THRUST_TEMP_DEVICE_CODE + } } @@ -283,25 +255,34 @@ inline void throw_on_error(cudaError_t status, char const *msg) if (cudaSuccess != status) { - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - throw thrust::system_error(status, thrust::cuda_category(), msg); - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - #if __THRUST_HAS_CUDART__ - printf("Thrust CUDA backend error: %s: %s: %s\n", - cudaGetErrorName(status), - cudaGetErrorString(status), - msg); - #else - printf("Thrust CUDA backend error: %d: %s \n", - static_cast(status), - msg); - #endif - cuda_cub::terminate(); - #endif - } + // Can't use #if inside NV_IF_TARGET, use a temp macro to hoist the device + // instructions out of the target logic. +#if __THRUST_HAS_CUDART__ + +#define THRUST_TEMP_DEVICE_CODE \ + printf("Thrust CUDA backend error: %s: %s: %s\n", \ + cudaGetErrorName(status), \ + cudaGetErrorString(status),\ + msg) + +#else + +#define THRUST_TEMP_DEVICE_CODE \ + printf("Thrust CUDA backend error: %d: %s\n", \ + static_cast(status), \ + msg) + +#endif + + NV_IF_TARGET(NV_IS_HOST, ( + throw thrust::system_error(status, thrust::cuda_category(), msg); + ), ( + THRUST_TEMP_DEVICE_CODE; + cuda_cub::terminate(); + )); + +#undef THRUST_TEMP_DEVICE_CODE + } } diff --git a/thrust/system/detail/sequential/sort.inl b/thrust/system/detail/sequential/sort.inl index 01920aa6e..241a860af 100644 --- a/thrust/system/detail/sequential/sort.inl +++ b/thrust/system/detail/sequential/sort.inl @@ -24,6 +24,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system { @@ -164,14 +166,14 @@ void stable_sort(sequential::execution_policy &exec, { // the compilation time of stable_primitive_sort is too expensive to use within a single CUDA thread -#ifndef __CUDA_ARCH__ - typedef typename thrust::iterator_traits::value_type KeyType; - sort_detail::use_primitive_sort use_primitive_sort; -#else - thrust::detail::false_type use_primitive_sort; -#endif - - sort_detail::stable_sort(exec, first, last, comp, use_primitive_sort); + NV_IF_TARGET(NV_IS_HOST, ( + using KeyType = thrust::iterator_value_t; + sort_detail::use_primitive_sort use_primitive_sort; + sort_detail::stable_sort(exec, first, last, comp, use_primitive_sort); + ), ( // NV_IS_DEVICE: + thrust::detail::false_type use_primitive_sort; + sort_detail::stable_sort(exec, first, last, comp, use_primitive_sort); + )); } @@ -188,14 +190,14 @@ void stable_sort_by_key(sequential::execution_policy &exec, { // the compilation time of stable_primitive_sort_by_key is too expensive to use within a single CUDA thread -#ifndef __CUDA_ARCH__ - typedef typename thrust::iterator_traits::value_type KeyType; - sort_detail::use_primitive_sort use_primitive_sort; -#else - thrust::detail::false_type use_primitive_sort; -#endif - - sort_detail::stable_sort_by_key(exec, first1, last1, first2, comp, use_primitive_sort); + NV_IF_TARGET(NV_IS_HOST, ( + using KeyType = thrust::iterator_value_t; + sort_detail::use_primitive_sort use_primitive_sort; + sort_detail::stable_sort_by_key(exec, first1, last1, first2, comp, use_primitive_sort); + ), ( // NV_IS_DEVICE: + thrust::detail::false_type use_primitive_sort; + sort_detail::stable_sort_by_key(exec, first1, last1, first2, comp, use_primitive_sort); + )); } diff --git a/thrust/system/detail/sequential/stable_merge_sort.inl b/thrust/system/detail/sequential/stable_merge_sort.inl index 7dcf03f59..02f384afb 100644 --- a/thrust/system/detail/sequential/stable_merge_sort.inl +++ b/thrust/system/detail/sequential/stable_merge_sort.inl @@ -24,6 +24,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system { @@ -355,16 +357,12 @@ void stable_merge_sort(sequential::execution_policy &exec, RandomAccessIterator last, StrictWeakOrdering comp) { - if (THRUST_IS_DEVICE_CODE) { - #if THRUST_INCLUDE_DEVICE_CODE - // avoid recursion in CUDA threads - stable_merge_sort_detail::iterative_stable_merge_sort(exec, first, last, comp); - #endif - } else { - #if THRUST_INCLUDE_HOST_CODE - stable_merge_sort_detail::recursive_stable_merge_sort(exec, first, last, comp); - #endif - } + NV_IF_TARGET(NV_IS_DEVICE, ( + // avoid recursion in CUDA threads + stable_merge_sort_detail::iterative_stable_merge_sort(exec, first, last, comp); + ), ( + stable_merge_sort_detail::recursive_stable_merge_sort(exec, first, last, comp); + )); } @@ -379,16 +377,12 @@ void stable_merge_sort_by_key(sequential::execution_policy &exec, RandomAccessIterator2 first2, StrictWeakOrdering comp) { - if (THRUST_IS_DEVICE_CODE) { - #if THRUST_INCLUDE_DEVICE_CODE - // avoid recursion in CUDA threads - stable_merge_sort_detail::iterative_stable_merge_sort_by_key(exec, first1, last1, first2, comp); - #endif - } else { - #if THRUST_INCLUDE_HOST_CODE - stable_merge_sort_detail::recursive_stable_merge_sort_by_key(exec, first1, last1, first2, comp); - #endif - } + NV_IF_TARGET(NV_IS_DEVICE, ( + // avoid recursion in CUDA threads + stable_merge_sort_detail::iterative_stable_merge_sort_by_key(exec, first1, last1, first2, comp); + ), ( + stable_merge_sort_detail::recursive_stable_merge_sort_by_key(exec, first1, last1, first2, comp); + )); } diff --git a/thrust/system/detail/sequential/trivial_copy.h b/thrust/system/detail/sequential/trivial_copy.h index cefb18938..ea55c8fd2 100644 --- a/thrust/system/detail/sequential/trivial_copy.h +++ b/thrust/system/detail/sequential/trivial_copy.h @@ -24,6 +24,8 @@ #include #include +#include + THRUST_NAMESPACE_BEGIN namespace system { @@ -40,16 +42,14 @@ __host__ __device__ T *result) { T* return_value = NULL; - if (THRUST_IS_HOST_CODE) { - #if THRUST_INCLUDE_HOST_CODE - std::memmove(result, first, n * sizeof(T)); - return_value = result + n; - #endif - } else { - #if THRUST_INCLUDE_DEVICE_CODE - return_value = thrust::system::detail::sequential::general_copy_n(first, n, result); - #endif - } + + NV_IF_TARGET(NV_IS_HOST, ( + std::memmove(result, first, n * sizeof(T)); + return_value = result + n; + ), ( // NV_IS_DEVICE: + return_value = thrust::system::detail::sequential::general_copy_n(first, n, result); + )); + return return_value; } // end trivial_copy_n() From fdcd8e1aebbf8feeaafbbd27515de0d447772f1a Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Thu, 27 May 2021 18:18:20 -0400 Subject: [PATCH 07/10] Remove unreachable code. --- testing/unittest/cuda/testframework.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/testing/unittest/cuda/testframework.cu b/testing/unittest/cuda/testframework.cu index d5bc4aaba..ff30f368c 100644 --- a/testing/unittest/cuda/testframework.cu +++ b/testing/unittest/cuda/testframework.cu @@ -137,7 +137,6 @@ bool CUDATestDriver::run_tests(const ArgumentSet &args, const ArgumentMap &kwarg { std::cout << "--verbose and --concise cannot be used together" << std::endl; exit(EXIT_FAILURE); - return false; } // check error status before doing anything From 59a72c05b575662b5ca4d68c3cf57398b0f741c5 Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Fri, 28 May 2021 08:15:58 -0400 Subject: [PATCH 08/10] Initialize members in `cuda_optional` detail class. --- thrust/system/cuda/detail/core/util.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/thrust/system/cuda/detail/core/util.h b/thrust/system/cuda/detail/core/util.h index fd8821901..4e014ccc6 100644 --- a/thrust/system/cuda/detail/core/util.h +++ b/thrust/system/cuda/detail/core/util.h @@ -601,12 +601,11 @@ namespace core { template class cuda_optional { - cudaError_t status_; - T value_; + cudaError_t status_{cudaSuccess}; + T value_{}; public: - __host__ __device__ - cuda_optional() : status_(cudaSuccess) {} + cuda_optional() = default; __host__ __device__ cuda_optional(T v, cudaError_t status = cudaSuccess) : status_(status), value_(v) {} From dd561bf21f11c0c4389ed778b1ac0e572ea86318 Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Wed, 28 Jul 2021 21:52:08 -0400 Subject: [PATCH 09/10] Fix some new and exciting exec_space `[subobject]` warnings. --- thrust/device_allocator.h | 8 +++----- thrust/mr/allocator.h | 3 ++- thrust/system/cuda/detail/par.h | 1 + 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/thrust/device_allocator.h b/thrust/device_allocator.h index bce4d947b..f64c3854f 100644 --- a/thrust/device_allocator.h +++ b/thrust/device_allocator.h @@ -115,7 +115,7 @@ class device_allocator }; /*! Default constructor has no effect. */ - __host__ + __host__ __device__ device_allocator() {} /*! Copy constructor has no effect. */ @@ -124,15 +124,13 @@ class device_allocator /*! Constructor from other \p device_allocator has no effect. */ template - __host__ + __host__ __device__ device_allocator(const device_allocator& other) : base(other) {} -#if THRUST_CPP_DIALECT >= 2011 device_allocator & operator=(const device_allocator &) = default; -#endif /*! Destructor has no effect. */ - __host__ + __host__ __device__ ~device_allocator() {} }; diff --git a/thrust/mr/allocator.h b/thrust/mr/allocator.h index b907c09db..67adbe87c 100644 --- a/thrust/mr/allocator.h +++ b/thrust/mr/allocator.h @@ -219,7 +219,8 @@ class stateless_resource_allocator : public thrust::mr::allocator /*! Default constructor. Uses \p get_global_resource to get the global instance of \p Upstream and initializes the * \p allocator base subobject with that resource. */ - __host__ + __thrust_exec_check_disable__ + __host__ __device__ stateless_resource_allocator() : base(get_global_resource()) { } diff --git a/thrust/system/cuda/detail/par.h b/thrust/system/cuda/detail/par.h index bd5953139..42c701ca7 100644 --- a/thrust/system/cuda/detail/par.h +++ b/thrust/system/cuda/detail/par.h @@ -48,6 +48,7 @@ struct execute_on_stream_base : execution_policy cudaStream_t stream; public: + __thrust_exec_check_disable__ __host__ __device__ execute_on_stream_base(cudaStream_t stream_ = default_stream()) : stream(stream_){} From 4cdf6deedda1ad2bd4fa1b37367c90cd16e2c7e5 Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Thu, 14 Apr 2022 15:02:24 -0400 Subject: [PATCH 10/10] Fix issues in testing/allocator.cu. - The `g_state` flag wasn't reset between executions. - The `destroy` method was being invoke in the current host system, not the system that owned the allocated memory (always cpp). This broke on MSVC's OpenMP implementation, where it seemed to be asserting the `g_state` flag before it was updated by `destroy`. This only happened on MSVC when host system = OMP, and appears to be a bug/miscompile in MSVC (repro'd on 2019). Fixed by explicitly tagging the allocator system to cpp. - Added check that `destroy` is not invoked on empty vectors. --- testing/allocator.cu | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/testing/allocator.cu b/testing/allocator.cu index 0317a2b31..175685ed0 100644 --- a/testing/allocator.cu +++ b/testing/allocator.cu @@ -63,9 +63,12 @@ DECLARE_VARIABLE_UNITTEST(TestAllocatorCustomCopyConstruct); template struct my_allocator_with_custom_destroy { - typedef T value_type; - typedef T & reference; - typedef const T & const_reference; + // This is only used with thrust::cpp::vector: + using system_type = thrust::cpp::tag; + + using value_type = T; + using reference = T &; + using const_reference = const T &; static bool g_state; @@ -120,12 +123,14 @@ bool my_allocator_with_custom_destroy::g_state = false; template void TestAllocatorCustomDestroy(size_t n) { + my_allocator_with_custom_destroy::g_state = false; + { thrust::cpp::vector > vec(n); } // destroy everything - if (0 < n) - ASSERT_EQUAL(true, my_allocator_with_custom_destroy::g_state); + // state should only be true when there are values to destroy: + ASSERT_EQUAL(n > 0, my_allocator_with_custom_destroy::g_state); } DECLARE_VARIABLE_UNITTEST(TestAllocatorCustomDestroy);